In [63]:
import sys
# Appending python modules so we can run simulations
sys.path.append("../python")
import argparse
import pandas as pd
import numpy as np
import os
import json
import csv
import time
from statsmodels.formula.api import ols
from models import abstraction

In [64]:
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__

In [88]:
args = {
    "nrange1": 5,
    "nrange2": 6,
    "drange1": 100,
    "drange2": 102,
    "scenedir": "../data/json/pilot8/trial/",
    "datadir": "../data/cleaned_data/cleaned_data_exp2.json",
    "savedir": "."
}

In [90]:
args = dotdict(args)

In [67]:
# Parameter spaces
N = range(args.nrange1,args.nrange2)
D = range(args.drange1,args.drange2)
E = np.arange(0.1,1.0,0.01)

In [68]:
# Import empirical data
data = pd.read_json(args.datadir)

In [69]:
# Import scene configuration files
scene_files = [scene_json for scene_json in os.listdir(args.scenedir) if scene_json.endswith('.json')]
scene_args = {}
for file in scene_files:
    with open(args.scenedir+file, 'r') as f:
        sargs = (json.loads(f.read()))
        scene_args[file.split(".")[0]] = sargs

In [70]:
len(scene_args)

180

In [71]:
# Observed RT
rt_mean = data.groupby('scene').part_zrt.apply(np.mean).to_frame()
scenes = rt_mean.index.to_list()

# Regression parameters
formula = 'part_zrt ~ sim_time_z'

In [72]:
def my_model(N,D,E,scene_args=scene_args):
    # Model dataframe
    model_df = pd.DataFrame({})

    # Get model predictins for each scene
    for scene in scenes:
        # Sample model 100 times on scene
        model_result = abstraction(scene_args[scene],N=int(N),D=D,E=E,num_samples=1)
        model_row = pd.DataFrame({
            "scene": scene,
            "collision_prob": np.mean(model_result['collision_probability']),
            "sim_time": model_result['simulation_time'],
            "type": "abstraction_model"
        })
        model_df = pd.concat([model_df,model_row])
           
    return model_df

In [87]:

# Dictionary of parameters and respective model fit resutls
model_results = []
# Grid search
for n_i in N:
    for d_i in D:
        for e_i in E:
            t1 = time.time()
            model_predictions = pd.DataFrame({})
            model_predictions = my_model(n_i,d_i,e_i)
            model_predictions['sim_time_z'] = model_predictions.sim_time.transform(lambda x: (x-x.mean())/x.std())
            df = model_predictions.groupby('scene').sim_time_z.apply(np.mean).to_frame()
            df = pd.merge(rt_mean, df, left_index = True, right_index = True)
            model_fit = ols(formula, df).fit()
            mse_res = model_fit.mse_resid
            mse_mod = model_fit.mse_model
            mse_tot = model_fit.mse_total
            model_results.append((n_i,d_i,e_i,mse_res,mse_mod,mse_tot))
            t2 = time.time()
            print(t2-t1)

0.2671539783477783
0.2438032627105713
0.2451000213623047
0.2439107894897461
0.24622201919555664
0.24313688278198242
0.2433779239654541
0.24400997161865234
0.2459878921508789
0.24561023712158203
0.2467188835144043
0.25158023834228516
0.25968384742736816
0.253140926361084
0.252565860748291
0.25089216232299805
0.2561969757080078
0.2533538341522217
0.24627089500427246
0.24519562721252441
0.2509613037109375
0.25098276138305664
0.2497882843017578
0.25610899925231934
0.2606086730957031
0.256680965423584
0.25010085105895996
0.25341105461120605
0.2518289089202881
0.25672411918640137
0.24964189529418945
0.24669218063354492
0.2474050521850586
0.2472379207611084
0.24703693389892578
0.26613306999206543
0.2527132034301758
0.25299978256225586
0.24902701377868652
0.25118017196655273
0.2493729591369629
0.25123000144958496
0.25843310356140137
0.25980496406555176
0.24987173080444336
0.2552502155303955
0.25592994689941406
0.25766706466674805
0.24690794944763184
0.24803900718688965
0.2549748420715332
0.258

In [58]:
model_results

[(5, 100, 0.1, 0.04154590470899713, 1.033372690633394, 0.047086836473937896),
 (5, 100, 0.11, 0.04154590470899713, 1.033372690633394, 0.047086836473937896),
 (5, 100, 0.12, 0.04154590470899713, 1.033372690633394, 0.047086836473937896),
 (5, 100, 0.13, 0.04154590470899713, 1.033372690633394, 0.047086836473937896),
 (5,
  100,
  0.13999999999999999,
  0.04154590470899713,
  1.033372690633394,
  0.047086836473937896),
 (5,
  100,
  0.14999999999999997,
  0.04154590470899713,
  1.033372690633394,
  0.047086836473937896),
 (5,
  100,
  0.15999999999999998,
  0.04154590470899713,
  1.033372690633394,
  0.047086836473937896),
 (5,
  100,
  0.16999999999999998,
  0.04154590470899713,
  1.033372690633394,
  0.047086836473937896),
 (5,
  100,
  0.17999999999999997,
  0.04154590470899713,
  1.033372690633394,
  0.047086836473937896),
 (5,
  100,
  0.18999999999999995,
  0.04154590470899713,
  1.033372690633394,
  0.047086836473937896),
 (5,
  100,
  0.19999999999999996,
  0.04154590470899713,
  1

In [97]:
# Save model results to CSV
with open(f"{args.savedir}/grid_fits_n_{n_i}_d_{d_i}.csv",'w') as out:
    csv_out=csv.writer(out)
    csv_out.writerow(['N','D','E','MSE Residual', 'MSE Model', 'MSE Total'])
    for row in model_results:
        csv_out.writerow(row)

In [83]:
# List of tuples
def ranges(start, stop, offset):
    '''
    Returns a list of P tuples that represent contiguous
    partitions on the number line between Start and Stop
    inclusive, with each partition having size O
    '''
    partitions = []
    rs = list(range(start, stop, offset))
    for r in rs:
        partitions.append((r, r+offset))
    return partitions

In [84]:
start = 1
stop = 1000
P = 5
O = 200

In [85]:
a = list(range(start, stop, O))
a

[1, 201, 401, 601, 801]

In [86]:
ranges(start, stop, O)

[(1, 201), (201, 401), (401, 601), (601, 801), (801, 1001)]

In [104]:
with open("../python/grid_fit.csv", "r") as csvfile:
    spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    for row in spamreader:
        print(', '.join(row))

N,D,E,MSE, Residual,MSE, Model,MSE, Total
5,100,0.1,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.11,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.12,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.13,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.13999999999999999,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.14999999999999997,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.15999999999999998,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.16999999999999998,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.17999999999999997,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.18999999999999995,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.19999999999999996,0.04154590470899713,1.033372690633394,0.047086836473937896
5,100,0.20999999999999996,0.04154590470899713,1.033372690633394,0.047086836473937896

In [105]:
df = pd.read_csv("../python/grid_fit.csv")

In [106]:
df

Unnamed: 0,N,D,E,MSE Residual,MSE Model,MSE Total
0,5,100,0.10,0.041546,1.033373,0.047087
1,5,100,0.11,0.041546,1.033373,0.047087
2,5,100,0.12,0.041546,1.033373,0.047087
3,5,100,0.13,0.041546,1.033373,0.047087
4,5,100,0.14,0.041546,1.033373,0.047087
...,...,...,...,...,...,...
85,5,100,0.95,0.041546,1.033373,0.047087
86,5,100,0.96,0.041546,1.033373,0.047087
87,5,100,0.97,0.041546,1.033373,0.047087
88,5,100,0.98,0.045008,0.417060,0.047087
