# Demo Notebook

In [50]:
# your other imports here ...
import sys, os
import pandas as pd

# TODO: replace with your path/to/ninarow
ninarowdir = os.path.dirname(os.getcwd())
modelfitdir = ninarowdir + "/model_fitting/"
# os.listdir(modelfitdir)

# sets the import path to the model-fitting directory
sys.path.insert(0, modelfitdir)
from parsers import *
from model_fit import *
import model_fit

# WARNING: DO NOT USE %load_ext autoreload and %autoreload 2 may interfere with 
# the Multi-threading processes!
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data Handling

### File Formatting
The data columns should be ordered: 

    - black_pieces (binary), 
    - white_pieces (binary), 
    - player_color (Black/White), 
    - move (binary), 
    - response time (not used in fitting), 
    - [group_id] (optional), 
    - participant_id

for more info, see `parsers.py`

In [3]:
# data_path = "../data"
# df = pd.read_pickle(f"{data_path}/data.pkl")
# df["response_time"] = 1
# df["participant_id"] = 1 # in the demo, there is only one participant, but if you have multiple, you may want to change this
# df["black"] = df["bp"]
# df["white"] = df["wp"]
# df = df[:10][["black", "white", "color", "move", "response_time", "participant_id"]]
# df.to_csv(f"{data_path}/data.csv", index = False)

In [4]:
# TODO: make a folder (here I've called it "data/")
# which holds your data in a csv called data.csv and put its directory here ...
data_path = "../data"
data_csv = f"{data_path}/data.csv"
df = pd.read_csv(data_csv)[:10]
df.head()

Unnamed: 0,black,white,color,move,response_time,participant_id
0,16,0,White,4194304,1,1
1,16400,4194304,White,8388608,1,1
2,16793616,12582912,White,2097152,1,1
3,8192,0,White,4194304,1,1
4,2105344,4194304,White,16,1,1


This is what our data looks like in our CSV ...

## Creating Cross Validation Splits

We can easily create cross validation splits by using `utils.make_splits`, which takes in a dataframe
and outputs into a specified `data_path`

In [None]:
from utils import make_splits
splits = make_splits(df, output_dir = data_path)

# view the first few lines of the first split
splits[0].head()

Saving split1 to ../data/1.csv
Saving split2 to ../data/2.csv
Saving split3 to ../data/3.csv
Saving split4 to ../data/4.csv
Saving split5 to ../data/5.csv


Unnamed: 0,black,white,color,move,response_time,participant_id
0,16400,4194304,White,8388608,1,1
1,2684354560,4194304,White,2097152,1,1


The parser takes in a CSV filename and turns it into a list of 
objects of type CSVMove ...

# Fit the Model

In [60]:
data_path = "../data"
output_path = "../data/out"
n_splits = 5
fold_number = 1
threads = 1
random_sample = False
verbose = True

print(f"Building output directory at {output_path}")
os.makedirs(output_path, exist_ok = True)

Building output directory at ../data/out


## Part 1: Loading Data

In [None]:
# first, we have to check to see if all the splits are there ...
assert np.all([f"{i + 1}.csv" in os.listdir(data_path) for i in range(n_splits)])
print("Detected splits in this directory. Loading splits ...")

# then we read them in
splits = [pd.read_csv(f"{data_path}/{i + 1}.csv") for i in range(n_splits)]

# we convert every row of our CSV to a "CSVMove object" using df_to_CSVMove - we do so for all the splits
# CSVMove is a class that is defined in the parsers.py file 
fold_data = [[csvmove for csvmove in df_to_CSVMove(split, warn = False)] for split in splits]

Detected splits in this directory. Loading splits ...
Building output directory at ../data/out


## Part 2: Model Fitting

To ensure that the code is perfectly reproducible, we use a **single thread** and we set the manual seed to a number of our choosing. If we use multiple threads, the code will still work but the operating system may choose the order of the threads arbitrarily, leading to variation in the output.

You should see an output that looks something like: 

    Setting manual seed 10 for single-thread
    Thread 0: Base Seed 10, Seed: 10, Random Number: 601088376405717203

Note that if you run this code multiple times, the Random Number should be the same. This means our thread will be initialized with a particular random seed - as it runs, it will not be reinitialized, so the randomness will still proceed, but in a predictable way.

In [51]:
from multiprocessing import Pool, Value, set_start_method
random.seed(10)
initialize_thread_pool(1, manual_seed = 10)

Setting manual seed 10 for single-thread
Thread 0: Base Seed 10, Seed: 10, Random Number: 601088376405717203



We are now ready to begin running our model. We will begin with the default model and then feed it to our  `ModelFitter` class.

In [52]:
model = DefaultModel()
model_fitter = ModelFitter(model, 
                           random_sample = random_sample, 
                           verbose = verbose, 
                           threads = threads)

params, loglik_train, loglik_test = model_fitter.cross_validate(fold_data, fold_number - 1)
# with (output_path / ("params" + str(i + 1) + ".csv")).open('w') as f:
#     f.write(','.join(str(x) for x in params))
# with (output_path / ("lltrain" + str(i + 1) + ".csv")).open('w') as f:
#     f.write(','.join(str(x) for x in loglik_train))
# with (output_path / ("lltest" + str(i + 1) + ".csv")).open('w') as f:
#     f.write(' '.join(str(x) for x in loglik_test) + '\n')

Cross validating split 1 against the other 4 splits
[Preprocessing] Initial log-likelihood estimation


 10%|█         | 1/10 [00:01<00:12,  1.43s/it]


KeyboardInterrupt: 

We can examine the fitted model parameters below ...

In [79]:
print("Fitted Model Parameters")
param_df = pd.DataFrame(dict(zip(model_fitter.model.param_names, params), index = [0])).drop("index", axis = 1)
param_df

Fitted Model Parameters


Unnamed: 0,Stopping threshold,Pruning threshold,Gamma,Lapse rate,Opponent scale,Exploration constant,Center weight,FP C_act,FP C_pass,FP delta
0,1.143486,0.011378,0.237918,0.053329,1.304252,1.829346,0.435217,-0.667626,4.117251,5.289307


## Part 3: Saving Parameters

In [None]:
loglik_train.to_csv(f"{output_path}/{fold_number}_lltrain.csv", index = False)
loglik_test.to_csv(f"{output_path}/{fold_number}_lltest.csv", index = False)
param_df.to_csv(f"{output_path}/{fold_number}_params.csv", index = False)

[4.699305559043987,
 1.1783333333333332,
 0.3,
 0.7833333333333333,
 3.378979594446965,
 3.662621018934961,
 0.6,
 1.0262301587301585]