## Calculate Human-only Results

In [1]:
import sys, os
import pandas as pd
import polars as pl

In [2]:
sys.path.append(f"../")

In [3]:
sys.path.append(f"../../methods")

In [4]:
from crowdkit.aggregation import DawidSkene, GLAD, OneCoinDawidSkene

In [5]:
from bds_stan_wrapper import BDS

In [6]:
from agg_methods import AggregationMethod

In [9]:
from io_utils import get_accuracy, get_recall, load_dataset_profile, load_gt, load_human_responses

In [None]:
!pwd

In [11]:
!mkdir ../results_human

In [12]:
def get_save_file_path(dataset_profile, r):
    dataset_name = dataset_profile["dataset_name"]
    n_classes = dataset_profile["n_classes"]

    file_name = f"../results_human/{dataset_name}_{r}.csv"
    if os.path.exists(file_name):
        assert False, "File already exists"
    with open(file_name, "w") as f:
        f.write("dataset,scenario,method,ai_acc,r,num_ai,iter,convergence,accuracy,recall,uc_p,")
        for i in range(n_classes):
            f.write(f"uc_pih_{i},")
        for i in range(n_classes-1):
            f.write(f"uc_pia_{i},")
        f.write(f"uc_pia_{i+1}")
        f.write("\n")
    return file_name

In [13]:
DATSETS = ["dog", "face", "tiny", "adult"]

In [14]:
def get_BDS_instance(labels, iter_warmup, iter_sampling, r):
    infer_params = {
        "iter_warmup": iter_warmup,
        "iter_sampling": iter_sampling,
    }
    return BDS(
        labels=labels,
        algorithm="mcmc",
        init_worker_accuracy=r,
        infer_params=infer_params,
    )

In [15]:
def get_aggregation_methods(labels, r=0.75, n_iter=100000):
    return [
        AggregationMethod("EMDS", False, True, DawidSkene(n_iter=n_iter)),
        AggregationMethod("GLAD", False, True, GLAD(n_iter=n_iter)),
        AggregationMethod("OneCoinDS", False, True, OneCoinDawidSkene(n_iter=n_iter)),
        AggregationMethod("BDS(iter_sampling=1000)", False, True,
                          get_BDS_instance(labels, iter_warmup=500, iter_sampling=1000, r=r)),
        AggregationMethod("BDS(iter_sampling=2000)", False, True,
                          get_BDS_instance(labels, iter_warmup=1000, iter_sampling=2000, r=r)),
        AggregationMethod("BDS(iter_sampling=3000)", False, True,
                          get_BDS_instance(labels, iter_warmup=1500, iter_sampling=3000, r=r)),
    ]

In [None]:
for dataset in DATSETS:
    print(f"Dataset: {dataset}")
    dataset_profile = load_dataset_profile(dataset)
    gt, biased_tasks = load_gt(dataset)

    methods = get_aggregation_methods(dataset_profile["labels"])
    for r in dataset_profile["r_range"]:
        file_name = get_save_file_path(dataset_profile, r)
        human_responses = load_human_responses(dataset, r)
        ai_responses = human_responses.iloc[0:0].copy()
        print(f"File will be saved to: {file_name}")
        for iter in range(5):
            for method in methods:
                print(f"Method: {method.name}")
                ret = method.fit_predict(human_responses, ai_responses)
                acc = get_accuracy(gt, ret)
                biased_acc = get_recall(gt, ret, biased_tasks)
                print(f"Accuracy: {acc}, Recall: {biased_acc}")
                uc_text = method.get_uc_text(dataset_profile["n_classes"])
                with open(file_name, "a") as f:
                    f.write(f"{dataset},*,{method.name},*,{r},0,{iter},-1,{acc},{biased_acc},{uc_text}\n")