In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from scipy.optimize import minimize
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import Normalizer

import warnings
warnings.filterwarnings("ignore")

KeyboardInterrupt: 

In [None]:
def load_data(name="charles_oneill"):
    # load training data
    train = pd.read_csv(f"~/intertemporal/data/{name}_train.csv")
    #train = pd.read_csv("~/intertemporal/data/charles_oneill_kirby.csv")
    cols = ["SIR", "LDR", "Delay", "Answer"]
    # load testing data
    test = pd.read_csv(f"~/intertemporal/data/{name}_test.csv")
    #test = pd.read_csv("~/intertemporal/data/charles_oneill_kirby.csv")
    return train[cols], test[cols]

In [None]:
train, test = load_data(name="jack_miller")

In [None]:
X_train, y_train = train.drop(columns=['Answer']), train.Answer.values
X_test, y_test = test.drop(columns=['Answer']), test.Answer.values
trans_train = Normalizer().fit(X_train)
trans_test = Normalizer().fit(X_test)
X_train = trans_train.transform(X_train)
X_test = trans_test.transform(X_test)

xbc = xgb.XGBClassifier(verbosity=0, random_seed=1)
#xbc = LogisticRegression()
xbc.fit(X_train, y_train)
preds = xbc.predict(X_test)
accuracy_score(preds, y_test)

In [497]:
def get_prob_choice(k, sir, ldr, delay, real_choice):
    # From Chabris & Laibson (2008)
    # We calculate the likelihood of the data (real choice)
    # Either takes value 0 or value 1 (immediate or delayed reward, respectively)
    # Via the logit functions on the right hand side
    if real_choice == 1: 
        p_choice = np.exp(ldr/(1 + k*delay)) / (np.exp(sir) + np.exp(ldr/(1 + k*delay)))
    else:
        p_choice = (1 -  (np.exp(ldr/(1 + k*delay)) / (np.exp(sir) + np.exp(ldr/(1 + k*delay)))))
    return p_choice

def generate_log_likelihood(current_k, train):
    # define vector that will store the probability that the model chooses
    choice_probs = np.zeros((len(train),1))
    for j in range(len(train)):
        # load the choice probability vector for every choice
        choice_probs[j] = get_prob_choice(current_k, train.SIR.iloc[j], train.LDR.iloc[j], 
                            train.Delay.iloc[j], train.Answer.iloc[j])
    # take sum of logs and negative to work within minimisation framework
    return (-1)*np.sum(np.log(choice_probs))

def simulate_choice(row, k):
    value = (row["LDR"]/(1+k*row["Delay"])) - row["SIR"]
    return 1 if value >= 0 else 0

In [519]:
train, test = load_data(name="jack_miller")
train.SIR /= 100
train.LDR /= 100
#train.Delay /= 20
test.SIR /= 100
test.LDR /= 100
#test.Delay /= 20

X_train, y_train = train.drop(columns=['Answer']), train.Answer.values
X_test, y_test = test.drop(columns=['Answer']), test.Answer.values

In [520]:
minimize(generate_log_likelihood, k_0, args=(train), method='BFGS')

      fun: 19.838056807148774
 hess_inv: array([[4.04322136e-08]])
      jac: array([0.])
  message: 'Optimization terminated successfully.'
     nfev: 37
      nit: 7
     njev: 15
   status: 0
  success: True
        x: array([0.00040275])

In [521]:
k_0 = 0.002
bnds = (0, None)
res = minimize(generate_log_likelihood, k_0, args=(train), method='BFGS')
k_fit = res.x[0]
preds = X_test.apply(simulate_choice, k=k_fit, axis=1)
accuracy_score(preds, y_test)

0.84

In [522]:
k_fit

0.0004027532805937858