## Evaluate CATD, LFC, PM-CRH, ZC, LA, and Minmax
Please place the file containing the aggregated results data in the appropriate location.

In [None]:
import sys

In [None]:
import polars as pl
import pandas as pd
import numpy as np

np.random.seed(777)

In [None]:
sys.path.append(f"../../main_experiment")

In [None]:
from io_utils import get_accuracy, get_recall, load_dataset_profile, load_gt, load_human_responses

In [None]:
sp=[0,5,10]

## Pre-step: Transform Minmax Results
Please place the Minmax results in the same folder as this notebook.

In [None]:
for num_ai in sp:
    for iter in range(5):
        df = pl.read_csv(f"result_{num_ai}_{iter+1}.csv", has_header=False, dtypes=[pl.Utf8],
                         new_columns=["task","1","2","3","4","5","6"])
        df = df.with_columns(
            max_val = pl.max_horizontal(["1","2","3","4","5","6"])
        ).with_columns(
            label = (
                pl.when(pl.col("1") == pl.col("max_val")).then(pl.lit("0"))
                .when(pl.col("2") == pl.col("max_val")).then(pl.lit("1"))
                .when(pl.col("3") == pl.col("max_val")).then(pl.lit("2"))
                .when(pl.col("4") == pl.col("max_val")).then(pl.lit("3"))
                .when(pl.col("5") == pl.col("max_val")).then(pl.lit("4"))
                .otherwise(pl.lit("5"))
            )
        ).drop("max_val")
        df = df.select(["task","label"])
        df.write_csv(f"../scripts/results/minmax_AI={num_ai}_{iter}.csv")

## Main-step: Get Scores
Please put the results data in `scripts/results`

In [None]:
human_df = pd.read_csv("../human_responses_with_gt.csv")
gt = human_df.filter(["task","gt"]).drop_duplicates(keep='last')
biased_tasks = gt[gt["gt"]==4]["task"].unique()  
gt = gt.set_index("task")
human_df = human_df.drop(["gt"], axis=1)

In [None]:
METHODS = [
    "CATD", "LA1", "LA2", "LFC", "minmax", "PM-CRH", "ZC"
]

In [None]:
def get_record(num_ai, iter, method, acc, biased_acc):
    return {
        "num_ai": num_ai,
        "iteration": iter,
        "method": method,
        "accuracy": acc,
        "recall": biased_acc,
    }

records = []
for num_ai in sp:
    print(f"=== {num_ai} AI Workers ===")
    for iter in range(5):
        for name in METHODS:
            print(f"--- {name} ---")
            ret = pl.read_csv(f"../scripts/results/{name}_AI={num_ai}_{iter}.csv")
            ret  = ret.select(["task", "label"]).to_pandas().set_index("task")
            
            acc = get_accuracy(gt, ret)
            biased_acc = get_recall(gt, ret, biased_tasks)
            print(f"Accuracy: {acc}, Recall: {biased_acc}")
            record = get_record(num_ai, iter, name, acc, biased_acc)
            records.append(record)
result_df = pd.DataFrame(records)

In [None]:
result_df.to_csv("truth_infer_results.csv", index=False)