In [1]:
from math import sqrt
from statistics import mean, stdev
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from collections import defaultdict

In [2]:
datafile = "problintopo_estparams_merged.txt"
d = pd.read_csv(datafile)

In [3]:
df_full = d.loc[:, ["true_phi_value", "true_nu_value", "cassg_phi", "cassg_nu", "startle_phi", "startle_nu", "problin_phi", "problin_nu", "modelcondition"]]

In [4]:
df_full

Unnamed: 0,true_phi_value,true_nu_value,cassg_phi,cassg_nu,startle_phi,startle_nu,problin_phi,problin_nu,modelcondition
0,0.250,0.000,0.258661,0.000017,0.258664,0.000009,0.258664,0.000010,s0d100
1,0.250,0.000,0.257862,0.000012,0.257865,0.000005,0.257865,0.000006,s0d100
2,0.250,0.000,0.251366,0.004157,0.253054,0.000037,0.253000,0.000178,s0d100
3,0.250,0.000,0.253598,0.000007,0.253599,0.000004,0.253599,0.000003,s0d100
4,0.250,0.000,0.254272,0.000685,0.253437,0.003025,0.251910,0.006818,s0d100
...,...,...,...,...,...,...,...,...,...
245,0.077,0.208,0.065464,0.247626,0.068929,0.242506,0.065375,0.217146,s75d25
246,0.077,0.208,0.076692,0.243255,0.088806,0.221936,0.085067,0.198516,s75d25
247,0.077,0.208,0.065182,0.263328,0.072959,0.243844,0.067893,0.229035,s75d25
248,0.077,0.208,0.072229,0.259871,0.077404,0.250222,0.070611,0.244739,s75d25


In [5]:
true_values = dict()
for mc, mdf in df_full[['true_phi_value', 'true_nu_value', 'modelcondition']].groupby(['modelcondition']):
    mc = mc[0]
    s = mdf[['true_phi_value', 'true_nu_value']].mean(axis=0)
    true_values[mc] = [s[0], s[1]]

print(true_values)

{'s0d100': [0.25, 0.0], 's100d0': [0.0, 0.28800000000000003], 's25d75': [0.19999999999999996, 0.065], 's50d50': [0.143, 0.134], 's75d25': [0.077, 0.20800000000000002]}


# Calculate Euclidean distance

In [6]:
print("don't split by model condition")
results = dict()
df = df_full

phi = pd.concat([df['cassg_phi'], df['startle_phi'], df['startle_phi']])  
nu = pd.concat([df['cassg_nu'], df['startle_nu'], df['startle_nu']])  


don't split by model condition


In [7]:
# p1 = phi.to_numpy().reshape(-1, 1).T[0]
# p2 = nu.to_numpy().reshape(-1, 1).T[0]

In [8]:
results = defaultdict(dict)
features = ['phi', 'nu']
method_map = defaultdict(list)


In [9]:
method_map['cassg'].append([x for x in df['cassg_phi']])
method_map['cassg'].append([x for x in df['cassg_nu']])

method_map['startle'].append([x for x in df['startle_phi']])
method_map['startle'].append([x for x in df['startle_nu']])

method_map['problin'].append([x for x in df['problin_phi']])
method_map['problin'].append([x for x in df['problin_nu']])

In [10]:
true_values

{'s0d100': [0.25, 0.0],
 's100d0': [0.0, 0.28800000000000003],
 's25d75': [0.19999999999999996, 0.065],
 's50d50': [0.143, 0.134],
 's75d25': [0.077, 0.20800000000000002]}

In [11]:
mc_mappings = {0:'s0d100', 1:'s100d0', 2:'s25d75', 3:'s50d50', 4:'s75d25'}

In [12]:
def dist(p1, p2):
    [xi, yi], [xi_hat, yi_hat] = p1, p2
    return sqrt((xi - xi_hat)**2 + (yi - yi_hat)**2)

In [13]:
for method in method_map:
    # phi, nu
    x, y = method_map[method]

    for i in range(5): 
        # model condition
        xx = x[i*50: (i+1)*50] # phi
        yy = y[i*50: (i+1)*50] # nu
        
        t = 0
        for j in range(50):
            # sample idx
            d = dist([xx[j], yy[j]], true_values[mc_mappings[i]])
            t += d**2
        t = sqrt(t/50)
        print(f'method:{method}, modelcond:{mc_mappings[i]}, dist:{t}')


method:cassg, modelcond:s0d100, dist:0.005592761780115098
method:cassg, modelcond:s100d0, dist:0.0407363515229898
method:cassg, modelcond:s25d75, dist:0.019873982857001777
method:cassg, modelcond:s50d50, dist:0.032546900042729886
method:cassg, modelcond:s75d25, dist:0.06170522503545548
method:startle, modelcond:s0d100, dist:0.004874351064665293
method:startle, modelcond:s100d0, dist:0.04311640213965291
method:startle, modelcond:s25d75, dist:0.016216765856079597
method:startle, modelcond:s50d50, dist:0.02166491174840197
method:startle, modelcond:s75d25, dist:0.038482280383993324
method:problin, modelcond:s0d100, dist:0.005198507735540967
method:problin, modelcond:s100d0, dist:0.015975956078713976
method:problin, modelcond:s25d75, dist:0.015794694961357533
method:problin, modelcond:s50d50, dist:0.01809981414571279
method:problin, modelcond:s75d25, dist:0.026749650088987307


In [14]:
df['modelcondition'].unique()

array(['s0d100', 's100d0', 's25d75', 's50d50', 's75d25'], dtype=object)

In [15]:
df_full['modelcondition'].unique()

array(['s0d100', 's100d0', 's25d75', 's50d50', 's75d25'], dtype=object)