# Development

In [2]:
import pandas as pd
import numpy as np
import xgboost as xgb
from scipy.optimize import minimize
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import Normalizer

import warnings
warnings.filterwarnings("ignore")

In [70]:
def load_data(name="charles_oneill"):
    # load training data
    train = pd.read_csv(f"~/intertemporal/data/{name}_train.csv")
    cols = ["SIR", "LDR", "Delay", "Answer"]
    # load testing data
    test = pd.read_csv(f"~/intertemporal/data/{name}_test.csv")
    return train[cols], test[cols]

In [71]:
class ParameterFit:
    
    def __init__(self, train, test):
        self.train = train
        self.test = test
    
    def get_prob_choice(self, k, sir, ldr, delay, real_choice):
        if real_choice == 1: 
            p_choice = np.exp(ldr/(1 + k*delay)) / (np.exp(sir) + np.exp(ldr/(1 + k*delay)))
        else:
            p_choice = (1 -  (np.exp(ldr/(1 + k*delay)) / (np.exp(sir) + np.exp(ldr/(1 + k*delay)))))
        return p_choice

    def generate_log_likelihood(self, current_k, train):
        # define vector that will store the probability that the model chooses
        choice_probs = np.zeros((len(train),1))
        for j in range(len(train)):
            # load the choice probability vector for every choice
            choice_probs[j] = self.get_prob_choice(current_k, train.SIR.iloc[j], train.LDR.iloc[j], 
                                train.Delay.iloc[j], train.Answer.iloc[j])
        # take sum of logs and negative to work within minimisation framework
        return (-1)*np.sum(np.log(choice_probs))

    def simulate_choice(self, row, k):
        value = (row["LDR"]/(1+k*row["Delay"])) - row["SIR"]
        return 1 if value >= 0 else 0
    
    def fit(self):
        k_0 = 0.001
        res = minimize(self.generate_log_likelihood, k_0, args=(self.train), method='BFGS')
        k_fit = res.x[0]
        preds = self.test.apply(self.simulate_choice, k=k_fit, axis=1)
        return preds

In [72]:
class Experiment:
    
    def __init__(self, train, test, model, scale=10):
        self.train = train
        self.test = test
        self.X_train, self.y_train = train.drop(columns=['Answer']), train.Answer.values
        self.X_test, self.y_test = test.drop(columns=['Answer']), test.Answer.values
        self.model = model
        self.scale = scale
        
    def normalise(self):
        trans_train = Normalizer().fit(self.X_train)
        trans_test = Normalizer().fit(self.X_test)
        X_train = trans_train.transform(self.X_train)
        X_test = trans_test.transform(self.X_test)
        return X_train, X_test
    
    def normalise_params(self):
        train, test = self.train.copy(), self.test.copy()
        train.SIR /= self.scale
        train.LDR /= self.scale
        test.SIR /= self.scale
        test.LDR /= self.scale
        return train, test
    
    def run(self):
        # run Model X
        X_train, X_test = self.normalise()
        self.model.fit(X_train, self.y_train)
        model_preds = self.model.predict(X_test)
        model_accuracy = accuracy_score(model_preds, self.y_test)
        # run ParameterFit
        train, test = self.normalise_params()
        param = ParameterFit(train, test)
        param_preds = param.fit()
        param_accuracy = accuracy_score(param_preds, self.y_test)
        return model_accuracy, param_accuracy

In [84]:
train, test = load_data("jack_miller")
model = xgb.XGBClassifier(verbosity=0)
exp = Experiment(train, test, model, scale=50)
exp.run()

(0.88, 0.84)

## Separate a dataset

In [90]:
df = pd.read_csv("~/Desktop/laura_ferguson.csv")
laura_ferguson_train = df[0:50]
laura_ferguson_test = df[50:75]
laura_ferguson_kirby = df[75:]

In [92]:
laura_ferguson_train.to_csv("~/intertemporal/data/laura_ferguson_train.csv", index=False)
laura_ferguson_test.to_csv("~/intertemporal/data/laura_ferguson_test.csv", index=False)
laura_ferguson_kirby.to_csv("~/intertemporal/data/laura_ferguson_kirby.csv", index=False)