In [1]:
import json
import pandas as pd
import pickle

from unified_experiments import run_experiment_for_target_variable

In [2]:
with open("codebook.json", "r") as codebook_file:
    codebook = json.load(codebook_file)

In [3]:
results_dict = {
    variable: (
        run_experiment_for_target_variable([variable]),
        codebook[variable]["scale_type"]
    )
    for variable in codebook.keys()
}

results_dict_dropped_missing_values = {
    variable: (
        run_experiment_for_target_variable([variable], drop_missing_val=True),
        codebook[variable]["scale_type"]
    )
    for variable in codebook.keys()
}

with open('results_dict.pkl', 'wb') as f:
    pickle.dump(results_dict, f, protocol=pickle.HIGHEST_PROTOCOL)

with open('results_dict_dropped_missing_values.pkl', 'wb') as f:
    pickle.dump(results_dict_dropped_missing_values, f, protocol=pickle.HIGHEST_PROTOCOL)

In [4]:
with open('results_dict.pkl', 'rb') as f:
    results_dict = pickle.load(f)

with open('results_dict_dropped_missing_values.pkl', 'rb') as f:
    results_dict_dropped_missing_values = pickle.load(f)

In [5]:
def big_dataframe(results_dict):
    rows = []

    for var_name, (df, var_type) in results_dict.items():
        variable_type = 'Unipolar' if var_type == 'u' else 'Bipolar'

        row = [variable_type, var_name] + df.iloc[0].tolist()
        rows.append(row)

    columns = ['Type', 'Variable'] + [col.replace('_', ' ').title() for col in df.columns.tolist()]

    results_df = pd.DataFrame(rows, columns=columns)
    results_df.set_index(['Type', 'Variable'], inplace=True)

    numeric_columns = results_df.select_dtypes(include='number').columns
    mean_df = results_df.groupby(['Type'])[numeric_columns].mean()

    return results_df, mean_df

In [6]:
def write_latex(df, path):
    with open(path, "w", encoding="utf-8") as f:
        f.write("\\begin{table}[H]\n")
        f.write(df.to_latex(bold_rows=True))
        f.write("\end{table}\n")

In [7]:
path = "../Reports/Final Report/tables"

results_df, mean_df = big_dataframe(results_dict)
write_latex(results_df, f"{path}/results_with_missing_values.tex")
write_latex(mean_df, f"{path}/means_with_missing_values.tex")

results_df2, mean_df2 = big_dataframe(results_dict_dropped_missing_values)
write_latex(results_df2, f"{path}/results_without_missing_values.tex")
write_latex(mean_df2, f"{path}/means_without_missing_values.tex")

In [8]:
results_df.describe()

Unnamed: 0,Mean Absolute Error,Cohen Kappa Score,Spearmanr
count,41.0,41.0,41.0
mean,2.175045,0.003649,0.183456
std,1.506106,0.022032,0.171797
min,0.059074,-0.034093,-0.224392
25%,0.95896,-0.00957,0.084148
50%,2.127999,2e-05,0.16937
75%,3.201438,0.007078,0.324105
max,4.972976,0.085699,0.476543


In [9]:
results_df2.describe()

Unnamed: 0,Mean Absolute Error,Cohen Kappa Score,Spearmanr
count,41.0,41.0,41.0
mean,1.983948,-0.000192,0.167989
std,1.279976,0.016775,0.177379
min,0.068056,-0.033302,-0.225402
25%,0.968477,-0.012358,0.053133
50%,1.982368,0.000272,0.146555
75%,2.994123,0.005425,0.317069
max,4.889868,0.039456,0.472565
