# Evaluate BDS, HS-DS

In [None]:
import sys

In [None]:
import polars as pl
import pandas as pd
import numpy as np

np.random.seed(777)

In [None]:
sys.path.append(f"../../main_experiment")
sys.path.append(f"../../methods")

In [None]:
from hsds_stan import HSDS_Stan, SeparatedBDS
from hsds_em import HSDS_EM
from bds_stan_wrapper import BDS

In [None]:
from io_utils import get_accuracy, get_recall, load_dataset_profile, load_gt, load_human_responses

In [None]:
human_df = pd.read_csv("../human_responses_with_gt.csv")
gt = human_df.filter(["task","gt"]).drop_duplicates(keep='last')
biased_tasks = gt[gt["gt"]==4]["task"].unique()
gt = gt.set_index("task")
human_df = human_df.drop(["gt"], axis=1)

## Utils

In [None]:
def get_BDS_instance(labels, iter_warmup, iter_sampling, r):
    infer_params = {
        "iter_warmup": iter_warmup,
        "iter_sampling": iter_sampling,
    }
    return SeparatedBDS(
        labels=labels,
        algorithm="mcmc",
        init_worker_accuracy=r,
        infer_params=infer_params,
    )

def get_HSDS_instance(labels, iter_warmup, iter_sampling, r):
    infer_params = {
        "iter_warmup": iter_warmup,
        "iter_sampling": iter_sampling,
    }
    return HSDS_Stan(
        labels=labels,
        algorithm="mcmc",
        init_worker_accuracy=r,
        infer_params=infer_params
    )

In [None]:
def generate_methods(n_iter=100000, r=0.75, random_state=0):
    labels = [0,1,2,3,4,5]
    return {
        "HSDS-EM": HSDS_EM(n_iter=n_iter, r=r),
        "HSDS-MCMC(iter_sampling=3000)": get_HSDSmcmc_instance(labels, iter_warmup=1500//2, iter_sampling=3000//2, r=r),
        "BDS(iter_sampling=3000)": get_BDS_instance(labels, iter_warmup=1500, iter_sampling=3000, r=r),
    }

In [None]:
def get_record(num_ai, iter, method, acc, biased_acc, uc_count):
    return {
        "num_ai": num_ai,
        "iteration": iter,
        "method": method,
        "accuracy": acc,
        "recall": biased_acc,
        "uc_count": uc_count
    }

In [None]:
def get_uc_text(obj, n_classes: int) -> str:
    text = f"{obj.p_unconverged_count},"
    for c in obj.pih_unconverged_count:
        text += f"{c},"
    for c in obj.pia_unconverged_count:
        text += f"{c},"
    return text[:-1]

## Step1: Human and AI results

In [None]:
sp = [5,10]

In [None]:
for i in range(sp[0]):
    aidf = pd.read_csv(f"../ai_responses/AI_{i}.csv")
    if i != 0:
        df10 = pd.concat([df10, aidf], axis=0, ignore_index=True)
    else:
        df10 = aidf
for i in range(sp[1]):
    aidf = pd.read_csv(f"../ai_responses/AI_{i}.csv")
    if i != 0:
        df20 = pd.concat([df20, aidf], axis=0, ignore_index=True)
    else:
        df20 = aidf

In [None]:
records = []
for i,aidf in enumerate([df10, df20]):
    print(f"=== {sp[i]} AI Workers ===")
    for iter in range(5):
        methods = generate_methods(random_state=iter)
        for name, method in methods.items():
            print(f"--- {name} ---")
            ret = method.fit_predict(human_df, aidf)
            acc = get_accuracy(gt, ret)
            biased_acc = get_recall(gt, ret, biased_tasks)
            print(f"Accuracy: {acc}, Recall: {biased_acc}")
            try:
                uc_text = get_uc_text(method, 6)
            except AttributeError:
                uc_text = "-1"
            uc_count = np.array([int(s) for s in uc_text.split(",")]).sum()
            record = get_record(sp[i], iter, name, acc, biased_acc, uc_count)
            records.append(record)
            print(record)
result_df = pd.DataFrame(records)

In [None]:
result_df.to_csv("../results/bds_hsds.csv", index=False)

## Step2: Human-only Reslts (BDS)

In [None]:
def get_BDS_instance_human_only(labels, iter_warmup, iter_sampling, r):
    infer_params = {
        "iter_warmup": iter_warmup,
        "iter_sampling": iter_sampling,
    }
    return BDS(
        labels=labels,
        algorithm="mcmc",
        init_worker_accuracy=r,
        infer_params=infer_params,
    )

In [None]:
def generate_methods_human_only(n_iter=100000, r=0.75, random_state=0):
    labels = [0,1,2,3,4,5]
    return {
        "BDS(iter_sampling=3000)": get_BDS_instance_human_only(labels, iter_warmup=1500, iter_sampling=3000, r=r),
    }

In [None]:
records = []
for iter in range(5):
    methods = generate_methods_human_only(random_state=iter)
    for name, method in methods.items():
        print(f"--- {name} ---")
        ret = method.fit_predict(human_df, check_rhat=True)
        acc = get_accuracy(gt, ret)
        biased_acc = get_recall(gt, ret, biased_tasks)
        print(f"Accuracy: {acc}, Recall: {biased_acc}")
        uc_count = method.uc_count_bds
        print("uc_count:", uc_count)
        record = get_record(0, iter, name, acc, biased_acc, uc_count)
        records.append(record)
result_df = pd.DataFrame(records)

In [None]:
result_df.to_csv("../results/bds_human.csv", index=False)

In [18]:
#cleanup
!rm -r outputs