# Demo Notebook

In [1]:
# your other imports here ...
import sys, os
import pandas as pd

# TODO: replace with your path/to/ninarow
ninarowdir = os.path.dirname(os.getcwd())
modelfitdir = ninarowdir + "/model_fitting/"
# os.listdir(modelfitdir)

# sets the import path to the model-fitting directory
sys.path.insert(0, modelfitdir)
from parsers import *
from model_fit import *
import model_fit


# WARNING: DO NOT USE %load_ext autoreload and %autoreload 2 as it interferes with 
# the Multi-threading processes!

## Data Handling

### File Formatting
The data columns should be ordered: 

    - black_pieces (binary), 
    - white_pieces (binary), 
    - player_color (Black/White), 
    - move (binary), 
    - response time (not used in fitting), 
    - [group_id] (optional), 
    - participant_id

for more info, see parsers.py

In [2]:
data_path = "../data"
df = pd.read_pickle(f"{data_path}/data.pkl")
df["response_time"] = 1
df["participant_id"] = 1 # in the demo, there is only one participant, but if you have multiple, you may want to change this
df["black"] = df["bp"]
df["white"] = df["wp"]
df = df[:100][["black", "white", "color", "move", "response_time", "participant_id"]]
df.to_csv(f"{data_path}/data.csv", index = False)

In [3]:
# TODO: make a folder called /data and put its directory here ...
data_dir = "../data"
data_csv = f"{data_dir}/data.csv"
df = pd.read_csv(data_csv)

This is what our data looks like in our CSV ...

## Cross Validation Splits

In [4]:
from utils import make_splits

In [5]:
splits = make_splits(df, output_dir = data_dir)

Saving split1 to ../data/1.csv
Saving split2 to ../data/2.csv
Saving split3 to ../data/3.csv
Saving split4 to ../data/4.csv
Saving split5 to ../data/5.csv


In [6]:
splits[0].head()

Unnamed: 0,black,white,color,move,response_time,participant_id
0,1073741824,0,White,2097152,1,1
1,4832886784,6291456,White,8388608,1,1
2,17197699080,14688256,White,16,1,1
3,16400,4194304,White,8388608,1,1
4,4294967296,0,White,4194304,1,1


## Load Data into Model Fitting Code

In [7]:
from utils import *
[csvmove for csvmove in df_to_CSVMove(splits[0])][0]



1073741824	0	White	2097152	1.0	1	1

The parser takes in a CSV filename and turns it into a list of 
objects of type CSVMove ...

In [8]:
data = parse_participant_file(f"{data_path}/1.csv", ignore_csv_header = True)
print(f"the rows are type {type(data[0])}")
print("and they look like")
print(data[:10])

File is either not a JSON file, or is malformed. Attempting to parse as a CSV...
the rows are type <class 'parsers.CSVMove'>
and they look like
[1073741824	0	White	2097152	1.0	1	1, 4832886784	6291456	White	8388608	1.0	1	1, 17197699080	14688256	White	16	1.0	1	1, 16400	4194304	White	8388608	1.0	1	1, 4294967296	0	White	4194304	1.0	1	1, 4312793344	14680064	White	16384	1.0	1	1, 8192	0	White	4194304	1.0	1	1, 4849664008	14684160	White	8192	1.0	1	1, 8192	0	White	4194304	1.0	1	1, 5368709120	2097152	White	4194304	1.0	1	1]


### Making Cross Validation Splits 

In [9]:
data_path = "../data"
output_path = "../data/out"
n_splits = 5
fold_number = 1
threads = 1
random_sample = False
verbose = True


assert np.all([f"{i + 1}.csv" in os.listdir(data_path) for i in range(n_splits)])

print("Detected splits in this directory. Loading splits ...")
splits = [pd.read_csv(f"{data_dir}/{i + 1}.csv") for i in range(n_splits)]
fold_data = [[csvmove for csvmove in df_to_CSVMove(split, warn = False)] for split in splits]

print(f"Building output directory at {output_path}")
os.makedirs(output_path, exist_ok = True)

Detected splits in this directory. Loading splits ...
Building output directory at ../data/out


In [None]:
from multiprocessing import Pool, Value, set_start_method

initialize_thread_pool(Value('d', 0), 12)
model_fitter = ModelFitter(DefaultModel(), 
                           random_sample = random_sample, 
                           verbose = verbose, 
                           threads = threads)

params, loglik_train, loglik_test = model_fitter.cross_validate(fold_data, fold_number - 1)
with (output_path / ("params" + str(i + 1) + ".csv")).open('w') as f:
    f.write(','.join(str(x) for x in params))
with (output_path / ("lltrain" + str(i + 1) + ".csv")).open('w') as f:
    f.write(','.join(str(x) for x in loglik_train))
with (output_path / ("lltest" + str(i + 1) + ".csv")).open('w') as f:
    f.write(' '.join(str(x) for x in loglik_test) + '\n')

Cross validating split 1 against the other 4 splits
[Preprocessing] Initial log-likelihood estimation


100%|██████████| 10/10 [00:42<00:00,  4.24s/it]
100%|██████████| 80/80 [00:00<00:00, 8548.90it/s]


[0] Params: [2.001, 0.02, 0.2, 0.05, 1.2, 0.801, 1.001, 0.4, 3.501, 5.0] NLL: 160.8165
Beginning optimization of a STOCHASTIC objective function

 Iteration    f-count      E[f(x)]        SD[f(x)]           MeshScale          Method              Actions
     0           1         160.816             nan               1                                  
[1] Params: [1.053, 0.68, 0.147, 0.292, 0.539, 0.952, -1.899, -1.372, -4.072, -3.799] NLL: 246.9017
[2] Params: [1.334, 0.294, 0.414, 0.005, 1.939, -4.663, -4.106, 4.424, -3.281, 2.393] NLL: 210.133
[3] Params: [1.632, 0.782, 0.037, 0.156, 1.082, -3.052, 3.237, 1.782, -0.542, -3.545] NLL: 244.0774
[4] Params: [1.913, 0.396, 0.272, 0.423, 1.346, 1.294, 0.444, -4.99, -2.114, 0.264] NLL: 225.5033
[5] Params: [2.396, 0.624, 0.304, 0.122, 1.769, -2.373, 1.753, -1.235, 2.314, -0.088] NLL: 233.4644
[6] Params: [2.677, 0.127, 0.006, 0.331, 0.755, 3.271, 4.585, 3.037, 0.02, 3.682] NLL: 242.6217
[7] Params: [2.958, 0.95, 0.381, 0.446, 1.61, 4.17, 