# Case Study of Confusion Matrices

In [None]:
import sys
import os

import polars as pl
import pandas as pd
import numpy as np

In [None]:
sys.path.append(f"../../methods")
sys.path.append(f"../")

In [None]:
from hsds_em import HSDS_EM
from crowdkit.aggregation import DawidSkene, OneCoinDawidSkene

In [None]:
from hsds_stan import HSDS_Stan, SeparatedBDS

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
from io_utils import get_accuracy, get_recall

## Setup the specific condition
face, homo, r=5, AI_acc=mean, num_ai=15

In [None]:
gt = pd.read_csv("../human_responses/face_gt.csv")
biased_index = gt[gt["gt"]==0]["task"].unique()
gt = gt.set_index("task")

In [None]:
human = pd.read_csv("../human_responses/face_r=5.csv")

In [None]:
for run in range(15):
    tmp_df = pd.read_csv(f"../ai_responses/face_r=5_ai=mean_target=1_run={run}.csv")
    if run == 0:
        ai_df = tmp_df
    else:
        ai_df = pd.concat([ai_df, tmp_df], ignore_index=True)

In [None]:
df = pd.concat([human, ai_df], ignore_index=True)

## Utils

In [None]:
def ds_errors_to_numpy_dict(errors_df):
    workers = errors_df.index.get_level_values(0).unique()
    labels = errors_df.columns
    label_values = errors_df.index.get_level_values(1).unique()
    result = {}
    for worker in workers:
        sub_df = errors_df.loc[worker]
        sub_df = sub_df.reindex(label_values).sort_index()
        result[worker] = sub_df.values.T
    return result

In [None]:
def calc_avg_mat(mats):
    ai_mat = np.zeros((4,4))
    ai_cnt = 0
    human_mat = np.zeros((4,4))
    human_cnt = 0
    for key, mat in mats.items():
        mat = np.nan_to_num(mat)
        if key[:3] == "AI_":
            ai_mat += mat
            ai_cnt += 1
        else:
            human_mat += mat
            human_cnt += 1
    ai_mat /= ai_cnt
    human_mat /= human_cnt
    return human_mat, ai_mat

## Algorithms

### DS

In [None]:
ds = DawidSkene()
ret = ds.fit_predict(df)

In [None]:
get_accuracy(ret, gt)

In [None]:
get_recall(ret, gt, biased_index)

In [None]:
ds_mats = ds_errors_to_numpy_dict(ds.errors_)

In [None]:
# Print Averaged Human CM and AI CM
ds_h_cm, ds_a_cm = calc_avg_mat(ds_mats)
ds_h_cm, ds_a_cm

### HSDS-EM

In [None]:
hds = HSDS_EM()
ret = hds.fit_predict(human,ai_df)

In [None]:
get_accuracy(ret, gt)

In [None]:
get_recall(ret, gt, biased_index)

In [None]:
hds_mats = ds_errors_to_numpy_dict(hds.step2_ds.errors_)

In [None]:
# Print Averaged Human CM and AI CM
hds_h_cm, hds_a_cm = calc_avg_mat(hds_mats)
hds_h_cm, hds_a_cm

### OneCoin

In [None]:
ocds = OneCoinDawidSkene()
ret = ocds.fit_predict(df)

In [None]:
get_accuracy(ret, gt)

In [None]:
get_recall(ret, gt, biased_index)

In [None]:
ocmats = ds_errors_to_numpy_dict(ocds.errors_)

In [None]:
# Print Averaged Human CM and AI CM
ocd_h_cm, ocd_a_cm = calc_avg_mat(ocmats)
ocd_h_cm, ocd_a_cm

### BDS

In [None]:
infer_params = {
        "iter_warmup": 1500,
        "iter_sampling": 3000,
    }

In [None]:
bds = SeparatedBDS(labels=[0,1,2,3], algorithm="mcmc", infer_params=infer_params, init_worker_accuracy=0.75)

In [None]:
bds_ret = bds.fit_predict(human, ai_df)

In [None]:
get_accuracy(bds_ret, gt)

In [None]:
get_recall(bds_ret, gt,biased_index)

In [None]:
bds_pih = bds.step2_fit.stan_variable("pih")

In [None]:
bds_pih = bds_pih.mean(axis=0)

In [None]:
bds_h_cm = bds_pih.mean(axis=0)

In [None]:
# Print Averaged Human CM
bds_h_cm

In [None]:
bds_pia = bds.step2_fit.stan_variable("pia")

In [None]:
bds_pia = bds_pia.mean(axis=0)

In [None]:
bds_a_cm = bds_pia.mean(axis=0)

In [None]:
# Print Averaged AI CM
bds_a_cm

## HS-DS MCMC

In [None]:
infer_params = {
        "iter_warmup": 1500//2,
        "iter_sampling": 3000//2,
    }

In [None]:
hsbds = HSDS_Stan(labels=[0,1,2,3], algorithm="mcmc", infer_params=infer_params, init_worker_accuracy=0.75)

In [None]:
ret = hsbds.fit_predict(human, ai_df)

In [None]:
get_accuracy(ret, gt)

In [None]:
get_recall(ret, gt, biased_index)

In [None]:
hsbds_pih = hsbds.step2_fit.stan_variable("pih")

In [None]:
hsbds_pih = hsbds_pih.mean(axis=0)

In [None]:
hsbds_h_cm = hsbds_pih.mean(axis=0)

In [None]:
# Print Averaged Human CM
hsbds_h_cm

In [None]:
hsbds_pia = hsbds.step2_fit.stan_variable("pia")

In [None]:
hsbds_pia = hsbds_pia.mean(axis=0)

In [None]:
hsbds_a_cm = hsbds_pia.mean(axis=0)

In [None]:
# Print Averaged AI CM
hsbds_a_cm

In [None]:
# clean up
!rm -r ./outputs

## Visualization

In [None]:
from matplotlib import pyplot as plt

In [None]:
fig, axes = plt.subplots(5, 2, figsize=(10, 8))
cmaps = ["YlGnBu" for i in range(10)]
titles = ['HSDS_EM Human', 'HSDS_EM AI', 'DS Human', 'DS AI', 
          'OneCoin Human', 'OneCoin AI', "BDS HUman", "BDS AI", "HSDS_MCMC Human", "HSDS_MCMC AI"]
cms = [hds_h_cm, hds_a_cm, ds_h_cm, ds_a_cm, ocd_h_cm, ocd_a_cm, bds_h_cm, bds_a_cm, hsbds_h_cm, hsbds_a_cm]

for ax, cm, cmap, title in zip(axes.flat, cms, cmaps, titles):
    im = ax.imshow(cm, cmap=cmap, vmin=0, vmax=1)
    ax.set_title(title)
    ax.set_xlabel('Predicted Label')
    ax.set_ylabel('True Label')
    ax.set_yticks([0,1,2,3])
    ax.set_yticklabels([1,2,3,4])
    ax.set_xticks([0,1,2,3])
    ax.set_xticklabels([1,2,3,4])
    fig.colorbar(im, ax=ax, orientation='vertical')

plt.tight_layout()
plt.savefig("cm.svg", bbox_inches="tight")
plt.show()