In [71]:
import pandas as pd
import json
from sklearn.metrics import accuracy_score


In [26]:
# Find the common participants
def find_common_participants(json_file1, json_file2, json_file3):
    # Read the json files
    with open(json_file1) as f:
        data1 = json.load(f)
    with open(json_file2) as f:
        data2 = json.load(f)
    with open(json_file3) as f:
        data3 = json.load(f)

    participants1 = [response["name"] for response in data1]
    participants2 = [response["name"] for response in data2]
    participants3 = [response["name"] for response in data3]

    # Find the common participants
    common_participants = list(
        set(participants1).intersection(participants2, participants3)
    )
    return common_participants


In [30]:
def read_json(json_file, common_participants):
    with open(json_file) as f:
        data = json.load(f)

    output = []
    for response in data:
        name = response["name"]
        if not name in common_participants:
            continue
        entry = [name]
        for q in range(1, 31):
            resp = response[str(q)]
            ans = resp["q1"][0]
            dataset_name = resp["dataset"]
            entry += [ans, dataset_name]
        output.append(entry)

    return output


In [84]:
def read_json(json_files):
    common_participants = find_common_participants(*json_files)
    ground_truth = [
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        0,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
        1,
    ] * 3
    output = {}
    for i, json_file in enumerate(json_files):
        with open(json_file) as f:
            data = json.load(f)

        for response in data:
            name = response["name"]
            if not name in common_participants:
                continue
            entry = {}
            for q in range(1, 31):
                resp = response[str(q)]
                ans = resp["q1"][0]
                dataset_name = resp["dataset"]
                entry[q + 30 * i] = {
                    "pred": ans,
                    "label": ground_truth[q + 30 * i - 1],
                    "dataset": dataset_name,
                    "correct": ans == ground_truth[q + 30 * i - 1],
                }
            if name in output:
                output[name].update(entry)
            else:
                output[name] = entry

    return output


In [85]:
answers = read_json(["./exp1.json", "./exp2.json", "./exp3.json"])
with open("answers.json", "w") as f:
    json.dump(answers, f)


In [86]:
dfs = []
for name in answers.keys():
    df = pd.DataFrame.from_dict(answers[name], orient="index")
    df["name"] = pd.Series(name, index=df.index)
    dfs.append(df[["name", "pred", "label", "dataset", "correct"]])

answers = pd.concat(dfs, ignore_index=True)
answers.to_csv("answers.csv", index=False)


In [89]:
performance = answers.groupby("name").apply(
    lambda x: x.groupby("dataset").apply(
        lambda y: accuracy_score(y["label"], y["pred"])
    )
)
performance["avg"] = answers.groupby("name").apply(
    lambda x: accuracy_score(x["label"], x["pred"])
)
performance.sort_values(by="avg", ascending=False, inplace=True)
performance.to_csv("performance.csv")
print(performance)


dataset             1         2         3     4     5       6       avg
name                                                                   
A3JVBCF6Q5TKCJ  1.000  1.000000  0.857143  0.95  0.95  0.8750  0.933333
AA1XATGGO13NO   1.000  1.000000  0.928571  0.95  0.80  0.7500  0.888889
A2KRVXXFOKZHOF  1.000  0.833333  0.857143  0.95  0.85  0.8125  0.877778
A21VU7GCDLU5J1  1.000  1.000000  0.714286  0.90  0.90  0.7500  0.866667
A1PC3O3OZEZ6MX  1.000  1.000000  0.785714  0.85  0.75  0.8125  0.844444
A21FSGB4C529FP  1.000  1.000000  0.785714  0.85  0.75  0.8125  0.844444
A1PV3B0AMIBE3R  1.000  1.000000  0.857143  0.85  0.75  0.6250  0.822222
A2T24XH5T1BTFI  0.875  0.916667  0.785714  0.95  0.75  0.6250  0.811111
A2T6B74AUOTKIT  0.875  1.000000  0.571429  0.90  0.80  0.6250  0.788889
A1I7H6RDJS4EKN  0.750  0.833333  0.785714  0.90  0.80  0.5625  0.777778
A217VFGUK0BPKU  0.875  1.000000  0.642857  0.75  0.80  0.6875  0.777778
A2ZJWAYA4LH5WX  0.500  0.750000  0.642857  0.65  0.65  0.5000  0