# Customizing Tree Search

In [None]:
# your other imports here ...
import sys, os
import pandas as pd
import matplotlib.pyplot as plt

# TODO: replace with your path/to/ninarow
ninarowdir = os.path.dirname(os.getcwd())
modelfitdir = ninarowdir + "/model_fitting/"
# os.listdir(modelfitdir)

# sets the import path to the model-fitting directory
sys.path.insert(0, modelfitdir)
from parsers import *
from model_fit import *
from utils import *
import model_fit
from tqdm import tqdm
from tree_search import *

# WARNING: %load_ext autoreload and %autoreload 2 may interfere with 
# the Multi-threading processes!
%load_ext autoreload
%autoreload 2

# Data Handling

### File Formatting
The data columns should be ordered: 

    - black_pieces (binary), 
    - white_pieces (binary), 
    - player_color (Black/White), 
    - move (binary), 
    - response time (not used in fitting), 
    - [group_id] (optional), 
    - participant_id

for more info, see `parsers.py`

# Fitting the Model to Data

In [None]:
data_path = "../data"
output_path = "../data/out"
n_splits = 5
fold_number = 1
threads = 1
random_sample = False
verbose = True

print(f"Building output directory at {output_path}")
os.makedirs(output_path, exist_ok = True)

## Comparing Original Code with Custom Code

In [None]:
from multiprocessing import Pool, Value, set_start_method
from prodict import Prodict

# first, we have to check to see if all the splits are there ...
assert np.all([f"{i + 1}.csv" in os.listdir(data_path) for i in range(n_splits)])
print("Detected splits in this directory. Loading splits ...")

# then we read them in
splits = [pd.read_csv(f"{data_path}/{i + 1}.csv") for i in range(n_splits)]

# we convert every row of our CSV to a "CSVMove object" using df_to_CSVMove - we do so for all the splits
# CSVMove is a class that is defined in the parsers.py file 
fold_data = [[csvmove for csvmove in df_to_CSVMove(split, warn = False)] for split in splits]

random.seed(10)
model_fit.initialize_thread_pool(1, manual_seed = 10)

args = Prodict()
args.random_sample = False
args.verbose = True
args.threads = 1

model_fitter = ModelFitter(DefaultModel(), args = args)

params, loglik_train, loglik_test = model_fitter.cross_validate(fold_data, fold_number - 1)


We are now ready to begin running our model. We will begin with the default model and then feed it to our  `ModelFitter` class. Note that this code may take a very long time to run (a couple hours)...

If you are using multiple threads and seeing a thread-related error, please make sure to turn OFF `%load_ext autoreload` and `%autoreload 2` from the import statements above

In [None]:
from multiprocessing import Pool, Value, set_start_method
from prodict import Prodict

# first, we have to check to see if all the splits are there ...
assert np.all([f"{i + 1}.csv" in os.listdir(data_path) for i in range(n_splits)])
print("Detected splits in this directory. Loading splits ...")

# then we read them in
splits = [pd.read_csv(f"{data_path}/{i + 1}.csv") for i in range(n_splits)]

# we convert every row of our CSV to a "CSVMove object" using df_to_CSVMove - we do so for all the splits
# CSVMove is a class that is defined in the parsers.py file 
fold_data = [[csvmove for csvmove in df_to_CSVMove(split, warn = False)] for split in splits]

random.seed(10)
initialize_thread_pool(1, manual_seed = 10)

args = Prodict()
args.random_sample = None
args.verbose = True
args.threads = 1

cross_validate(TreeSearch(), splits, leave_out_idx=fold_number - 1, subsample = 1, threads = args.threads)

In [None]:
from model_fit import *
random.seed(10)
initialize_thread_pool(1, manual_seed = 10)

model_fitter = ModelFitter(DefaultModel(), 
                           random_sample = random_sample, 
                           verbose = verbose, 
                           threads = threads)

model_fitter.fit_model(fold_data[fold_number - 1])

We can examine the fitted model parameters below ...

In [None]:
loglik_train_df = pd.DataFrame(loglik_train, columns = ["loglik_train"])
loglik_test_df = pd.DataFrame(loglik_test, columns = ["loglik_test"])

print("Fitted Model Parameters")
param_df = pd.DataFrame(dict(zip(model_fitter.model.param_names, params), index = [0])).drop("index", axis = 1)
param_df

## Part 3: Saving Parameters

We will save the log likelihood for the train and test folds, as well as the parameters, as `{fold_number}_lltrain.csv`, `{fold_number}_lltest.csv`, and `{fold_number}_params.csv` in the directory specified by `{output_path}`

In [None]:
loglik_train_df.to_csv(f"{output_path}/{fold_number}_lltrain.csv", index = False)
loglik_test_df.to_csv(f"{output_path}/{fold_number}_lltest.csv", index = False)
param_df.to_csv(f"{output_path}/{fold_number}_params.csv", index = False)

# Running an Existing Model

## Loading the Model Parameters

First, let's load our model parameters in from a csv file ...

In [None]:
param_path = f"{output_path}/{fold_number}_params.csv"
param_df = pd.read_csv(param_path)
params = param_df.iloc[0].values

# set the parameters of the model that will be used in the tree search
model = DefaultModel()
heuristic = model.create_heuristic(params)

## Predicting moves on an example board

Let's start by taking an arbitrary board state and run our model prediction on it ...

In [None]:
black = 16793616	
white = 12582912
show(black, white)

# create a fourbynineboard object out of our patterns ...
board = fourbynine_board(fourbynine_pattern(black), fourbynine_pattern(white))

In [None]:
search = model.create_search(params, heuristic, board)
search.complete_search()
best_move_index = heuristic.get_best_move(search.get_tree()).board_position

In [None]:
fig = plt.figure(figsize = (4, 3))
ax = plt.subplot(1, 1, 1)

show(black, white, existing_fig=(fig, ax))
add_circle(best_move_index, existing_fig=(fig, ax), color = "blue")
print("Predicted Move shown in BLUE")


## Make a Heatmap of Predictions from Multiple Searches
You might notice that there is noisiness in the behavior of the model. To show how the model behaves over several runs, we'll want to show a heatmap of the different predictions

In [None]:
n_samples = 100
counts = np.zeros(36)
for _ in tqdm(range(n_samples), leave=True): 
    search = model.create_search(params, heuristic, board)
    search.complete_search()
    best_move_index = heuristic.get_best_move(search.get_tree()).board_position
    counts[best_move_index] += 1

fig = plt.figure(figsize = (4, 3))
ax = plt.subplot(1, 1, 1)
show(black, white, existing_fig=(fig, ax))

# the [::-1] is to flip the board because of a quirk in the way the board is plotted
# you basically have to mirror it over the y-axis (see "extent" below)
ax.imshow(counts.reshape(4, 9)[::-1], cmap = "Blues", extent=[85, 715, -160, -440], alpha = 1)