In [21]:
from math import sqrt
from statistics import mean, stdev
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from collections import defaultdict

In [22]:
datafile = "problintopo_estmissingparams_merged.txt"
d = pd.read_csv(datafile)

In [23]:
df_full = d.loc[:, ["true_phi_value", "true_nu_value", "cassg_phi", "cassg_nu", "startle_phi", "startle_nu", "problin_phi", "problin_nu", "nj_phi", "nj_nu", "modelcondition"]]

In [24]:
df_full

Unnamed: 0,true_phi_value,true_nu_value,cassg_phi,cassg_nu,startle_phi,startle_nu,problin_phi,problin_nu,nj_phi,nj_nu,modelcondition
0,0.250,0.000,0.258660,0.000020,0.258664,0.000007,0.258664,0.000010,0.258665,0.000004,s0d100
1,0.250,0.000,0.257862,0.000014,0.257865,0.000005,0.257865,0.000006,0.257865,0.000005,s0d100
2,0.250,0.000,0.251273,0.004407,0.253057,0.000027,0.253000,0.000178,0.253066,0.000003,s0d100
3,0.250,0.000,0.253596,0.000011,0.253599,0.000004,0.253599,0.000003,0.253599,0.000004,s0d100
4,0.250,0.000,0.254280,0.000667,0.253407,0.003133,0.251910,0.006818,0.254520,0.000037,s0d100
...,...,...,...,...,...,...,...,...,...,...,...
245,0.077,0.208,0.065253,0.248790,0.068509,0.248709,0.065375,0.217146,0.067638,0.244938,s75d25
246,0.077,0.208,0.076213,0.245166,0.090042,0.222313,0.085067,0.198516,0.080965,0.252948,s75d25
247,0.077,0.208,0.065032,0.264448,0.074050,0.244056,0.067893,0.229035,0.070194,0.249858,s75d25
248,0.077,0.208,0.070395,0.265763,0.078659,0.253501,0.070611,0.244739,0.067231,0.279421,s75d25


In [25]:
true_values = dict()
for mc, mdf in df_full[['true_phi_value', 'true_nu_value', 'modelcondition']].groupby(['modelcondition']):
    mc = mc[0]
    s = mdf[['true_phi_value', 'true_nu_value']].mean(axis=0)
    true_values[mc] = [s[0], s[1]]

print(true_values)

{'s0d100': [0.25, 0.0], 's100d0': [0.0, 0.28800000000000003], 's25d75': [0.19999999999999996, 0.065], 's50d50': [0.143, 0.134], 's75d25': [0.077, 0.20800000000000002]}


# Calculate Euclidean distance

In [26]:
print("don't split by model condition")
results = dict()
df = df_full

phi = pd.concat([df['cassg_phi'], df['startle_phi'], df['startle_phi'], df['nj_phi']])  
nu = pd.concat([df['cassg_nu'], df['startle_nu'], df['startle_nu'], df['nj_nu']])  


don't split by model condition


In [27]:
results = defaultdict(dict)
features = ['phi', 'nu']
method_map = defaultdict(list)


In [28]:
method_map['cassg'].append([x for x in df['cassg_phi']])
method_map['cassg'].append([x for x in df['cassg_nu']])

method_map['startle'].append([x for x in df['startle_phi']])
method_map['startle'].append([x for x in df['startle_nu']])

method_map['problin'].append([x for x in df['problin_phi']])
method_map['problin'].append([x for x in df['problin_nu']])

method_map['nj'].append([x for x in df['nj_phi']])
method_map['nj'].append([x for x in df['nj_nu']])

In [29]:
true_values

{'s0d100': [0.25, 0.0],
 's100d0': [0.0, 0.28800000000000003],
 's25d75': [0.19999999999999996, 0.065],
 's50d50': [0.143, 0.134],
 's75d25': [0.077, 0.20800000000000002]}

In [30]:
mc_mappings = {0:'s0d100', 1:'s100d0', 2:'s25d75', 3:'s50d50', 4:'s75d25'}

In [31]:
def dist(p1, p2):
    [xi, yi], [xi_hat, yi_hat] = p1, p2
    return sqrt((xi - xi_hat)**2 + (yi - yi_hat)**2)

In [33]:
for method in method_map:
    print(method)
    # phi, nu
    x, y = method_map[method]

    for i in range(5): 
        # model condition
        xx = x[i*50: (i+1)*50] # phi
        yy = y[i*50: (i+1)*50] # nu
        
        t = 0
        for j in range(50):
            # sample idx
            d = dist([xx[j], yy[j]], true_values[mc_mappings[i]])
            t += d**2
        t = sqrt(t/50)
        print(f'method:{method}, modelcond:{mc_mappings[i]}, dist:{t}')


cassg
method:cassg, modelcond:s0d100, dist:0.005725073141834611
method:cassg, modelcond:s100d0, dist:0.042069397266385894
method:cassg, modelcond:s25d75, dist:0.020655939870476814
method:cassg, modelcond:s50d50, dist:0.03472474162658097
method:cassg, modelcond:s75d25, dist:0.05326858382582422
startle
method:startle, modelcond:s0d100, dist:0.004838454822407907
method:startle, modelcond:s100d0, dist:0.050691228465384805
method:startle, modelcond:s25d75, dist:0.01588911382219392
method:startle, modelcond:s50d50, dist:0.021444879676975215
method:startle, modelcond:s75d25, dist:0.039637435552470374
problin
method:problin, modelcond:s0d100, dist:0.005198507735540967
method:problin, modelcond:s100d0, dist:0.015975956078713976
method:problin, modelcond:s25d75, dist:0.015794694961357533
method:problin, modelcond:s50d50, dist:0.01809981414571279
method:problin, modelcond:s75d25, dist:0.026749650088987307
nj
method:nj, modelcond:s0d100, dist:0.004811102340096712
method:nj, modelcond:s100d0, dist:

In [34]:
df['modelcondition'].unique()

array(['s0d100', 's100d0', 's25d75', 's50d50', 's75d25'], dtype=object)

In [35]:
df_full['modelcondition'].unique()

array(['s0d100', 's100d0', 's25d75', 's50d50', 's75d25'], dtype=object)