In [1]:
import os
import json
import csv
import pandas as pd
import numpy as np

In [2]:
results_csv_file = r"/Users/arvin/Documents/ucla research/active learning/results/results_10_2_2023_merged_edited.csv"

In [3]:
exp_df = pd.read_csv(results_csv_file)  

In [4]:
included_exp_names = exp_df['exp'].tolist()

In [5]:
models = ["DMPLS", "MSHAH", "strong"]
methods = ["coreset", "db_bald", "db_entropy", "random"]
exp_length = 5
num_rounds = 5
round_strings =  ["round" + str(round_num) + "_test" for round_num in range(num_rounds)]

In [6]:
results_dict = {}

# process weak supervision
for model in models:
    if model != "DMPLS":
        model_ = "DMPLS_" + model
    else:
        model_ = model
    model_dict = {}
    for method in methods:
        method_dict = {}
        for exp_num in range(exp_length):
            if exp_num == 0:
                exp_name = model_ + "_exp_" + method
            else:
                exp_name = model_ + "_exp" + str(exp_num) + "_" + method
            if exp_name in included_exp_names:
                sub_df = exp_df.loc[exp_df['exp'] == exp_name]
                for round_string in round_strings:
                    if round_string not in method_dict:
                        method_dict[round_string] = [float(sub_df[round_string].item())]
                    else:
                        method_dict[round_string] += [float(sub_df[round_string].item())]
        model_dict[method] = method_dict
    results_dict[model] = model_dict

# process strong supervision
model_dict = dict()
for method in methods:
    method_dict = dict()
    if method == "random":
        method_ = ""
    else:
        method_ = "_" + method
    for exp_num in range(exp_length):
        if exp_num == 0:
            exp_name = "DMPLS_exp_strong" + method_
        else:
            exp_name = "DMPLS_exp" + str(exp_num) + "_strong" + method_        
        if exp_name in included_exp_names:
            sub_df = exp_df.loc[exp_df['exp'] == exp_name]
            
            for round_string in round_strings:
                if round_string not in method_dict:
                    method_dict[round_string] = [float(sub_df[round_string].item())]
                else:
                    method_dict[round_string] += [float(sub_df[round_string].item())]
    model_dict[method] = method_dict
results_dict["strong"] = model_dict

In [7]:
results_dict

{'DMPLS': {'coreset': {'round0_test': [0.848169769741984,
    0.845885928661349,
    0.84403015460971,
    0.850129671711766,
    0.846778214377267],
   'round1_test': [0.869393471721048,
    0.873687922058315,
    0.866758952584513,
    0.859141027733667,
    0.86024891829944],
   'round2_test': [0.883506018071677,
    0.878559691208046,
    0.872507434032664,
    0.881622102650431,
    0.87502236979554],
   'round3_test': [0.870199398185925,
    0.868960871966797,
    0.873064859321881,
    0.879655497102432,
    0.871337297980505],
   'round4_test': [0.882332219803207,
    0.883718018298843,
    0.883542608985848,
    0.867305867387299,
    0.885445549650311]},
  'db_bald': {'round0_test': [0.693023778176397,
    0.679816549715563,
    0.681616778427582,
    0.657554686772979,
    0.665224023346524],
   'round1_test': [0.846751624040854,
    0.850126966288524,
    0.84575473720919,
    0.845767484595056,
    0.847306510701694],
   'round2_test': [0.873852284546996,
    0.87352766924

In [8]:
tuples_for_multiIndex =[(model, method) for model in models for method in methods] 
index = pd.MultiIndex.from_tuples(tuples_for_multiIndex, names=["model", "method"])
index

MultiIndex([( 'DMPLS',    'coreset'),
            ( 'DMPLS',    'db_bald'),
            ( 'DMPLS', 'db_entropy'),
            ( 'DMPLS',     'random'),
            ( 'MSHAH',    'coreset'),
            ( 'MSHAH',    'db_bald'),
            ( 'MSHAH', 'db_entropy'),
            ( 'MSHAH',     'random'),
            ('strong',    'coreset'),
            ('strong',    'db_bald'),
            ('strong', 'db_entropy'),
            ('strong',     'random')],
           names=['model', 'method'])

In [9]:
df = pd.DataFrame(index=index, columns=round_strings)

In [10]:
for model in models:
    for method in methods:
        sub_df = df.loc[model, method]
        results = results_dict[model][method]
        for round_string in round_strings:
            if round_string in results:
                #print(f"model: {model}, method: {method}, round_string: {round_string}, results: {results[round_string]}")
                round_results = results[round_string]
                num_results = len(round_results)
                mean_result = np.mean(round_results)
                std_results = np.std(round_results, ddof=1)
                mean_string = '%.3f' % mean_result
                std_string = '%.3f'% std_results
                sub_df[round_string] = f"{mean_string}+/-{std_string}"
            else:
                df = df.drop((model, method))
                break

In [11]:
df.index

MultiIndex([( 'DMPLS',    'coreset'),
            ( 'DMPLS',    'db_bald'),
            ( 'DMPLS', 'db_entropy'),
            ( 'DMPLS',     'random'),
            ( 'MSHAH',    'coreset'),
            ( 'MSHAH',    'db_bald'),
            ( 'MSHAH', 'db_entropy'),
            ( 'MSHAH',     'random'),
            ('strong',    'coreset'),
            ('strong',     'random')],
           names=['model', 'method'])

In [12]:
df = df.rename(columns={"round0_test": "20%", "round1_test" : "40%", "round2_test" : "60%", 
                   "round3_test" : "80%", "round4_test" : "100%"})

In [13]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,20%,40%,60%,80%,100%
model,method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
DMPLS,coreset,0.847+/-0.002,0.866+/-0.006,0.878+/-0.005,0.873+/-0.004,0.880+/-0.007
DMPLS,db_bald,0.675+/-0.014,0.847+/-0.002,0.873+/-0.002,0.876+/-0.005,0.879+/-0.005
DMPLS,db_entropy,0.689+/-0.014,0.840+/-0.007,0.870+/-0.005,0.875+/-0.004,0.876+/-0.003
DMPLS,random,0.687+/-0.012,0.844+/-0.007,0.867+/-0.003,0.875+/-0.004,0.879+/-0.006
MSHAH,coreset,0.848+/-0.005,0.860+/-0.004,0.857+/-0.011,0.870+/-0.003,0.871+/-0.002
MSHAH,db_bald,0.643+/-0.022,0.839+/-0.007,0.857+/-0.004,0.869+/-0.004,0.872+/-0.005
MSHAH,db_entropy,0.660+/-0.037,0.836+/-0.009,0.859+/-0.010,0.866+/-0.005,0.871+/-0.005
MSHAH,random,0.655+/-0.012,0.834+/-0.009,0.848+/-0.012,0.862+/-0.004,0.873+/-0.005
strong,coreset,0.886+/-0.009,0.900+/-0.002,0.905+/-0.005,0.909+/-0.005,0.909+/-0.003
strong,random,0.654+/-0.023,0.876+/-0.007,0.894+/-0.003,0.904+/-0.003,0.907+/-0.001
