In [None]:
import os
import json
import csv
import pandas as pd
import numpy as np

In [None]:
results_csv_file = r"/Users/arvin/Documents/ucla research/active learning/results/results_10_2_2023_merged_edited.csv"

In [None]:
exp_df = pd.read_csv(results_csv_file)  

In [None]:
included_exp_names = exp_df['exp'].tolist()

In [None]:
models = ["DMPLS", "DMPLS_MSHAH", "DMPLS_EM", "DMPLS_S2L", "strong"]
methods = ["coreset", "db_bald", "db_entropy", "random", "coregcn"]
exp_length = 5
num_rounds = 5
round_strings =  ["round" + str(round_num) + "_test" for round_num in range(num_rounds)]

In [None]:
results_dict = {}

# process weak supervision
for model in models:
    model_dict = {}
    for method in methods:
        method_dict = {}
        for exp_num in range(exp_length):
            if exp_num == 0:
                exp_name = model + "_exp_" + method
            else:
                exp_name = model + "_exp" + str(exp_num) + "_" + method
            if exp_name in included_exp_names:
                sub_df = exp_df.loc[exp_df['exp'] == exp_name]
                for round_string in round_strings:
                    if round_string not in method_dict:
                        method_dict[round_string] = [float(sub_df[round_string].item())]
                    else:
                        method_dict[round_string] += [float(sub_df[round_string].item())]
        model_dict[method] = method_dict
    results_dict[model] = model_dict

# process strong supervision
model_dict = dict()
for method in methods:
    method_dict = dict()
    if method == "random":
        method_ = ""
    else:
        method_ = "_" + method
    for exp_num in range(exp_length):
        if exp_num == 0:
            exp_name = "DMPLS_exp_strong" + method_
        else:
            exp_name = "DMPLS_exp" + str(exp_num) + "_strong" + method_        
        if exp_name in included_exp_names:
            sub_df = exp_df.loc[exp_df['exp'] == exp_name]
            
            for round_string in round_strings:
                if round_string not in method_dict:
                    method_dict[round_string] = [float(sub_df[round_string].item())]
                else:
                    method_dict[round_string] += [float(sub_df[round_string].item())]
    model_dict[method] = method_dict
results_dict["strong"] = model_dict

In [None]:
results_dict

In [None]:
tuples_for_multiIndex =[(model, method) for model in models for method in methods] 
index = pd.MultiIndex.from_tuples(tuples_for_multiIndex, names=["model", "method"])
index

In [None]:
round_counts_strings = [[round_string, f"round{i}_count"] for i, round_string in enumerate(round_strings)]
df = pd.DataFrame(index=index, columns=sum(round_counts_strings, []))

In [None]:
for model in models:
    for method in methods:
        sub_df = df.loc[model, method]
        results = results_dict[model][method]
        for round_string, count_string in round_counts_strings:
            if round_string in results:
                print(f"model: {model}, method: {method}, round_string: {round_string}, results: {results[round_string]}")
                round_results = results[round_string]
                num_results = len(round_results)
                mean_result = np.mean(round_results)
                std_results = np.std(round_results, ddof=1)
                mean_string = '%.3f' % mean_result
                std_string = '%.3f'% std_results
                sub_df[round_string] = f"{mean_string}+/-{std_string}"
                sub_df[count_string] = num_results

In [None]:
df