In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv("data/allHorizonData_cut.csv")

# data table

In [31]:
# Compute delta_r (reward difference: left - right)
def compute_delta_r(row):
    left_rewards = [row[f"r{i}"] for i in range(1, 4) if row[f"c{i}"] == 1]
    right_rewards = [row[f"r{i}"] for i in range(1, 4) if row[f"c{i}"] == 2]
    return (sum(left_rewards)/len(left_rewards) if left_rewards else 0) - \
           (sum(right_rewards)/len(right_rewards) if right_rewards else 0)

# information difference
def compute_delta_i(row):
    left_count = sum(1 for i in range(1, 5) if row[f"c{i}"] == 1)
    right_count = sum(1 for i in range(1, 5) if row[f"c{i}"] == 2)
    return left_count - right_count

In [33]:
all_features = ['gameLength', 'uc', 'r1', 'r2', 'r3', 'r4', 'c1', 'c2', 'c3', 'c4', 'c5']
df_all = df[all_features].dropna()

df_all["delta_r"] = df_all.apply(compute_delta_r, axis=1)
df_all["delta_i"] = df_all.apply(compute_delta_i, axis=1)

X_delta = df_all[["delta_r", "delta_i"]]
y = df_all["c5"]

X_train_delta, X_test_delta, y_train, y_test = train_test_split(X_delta, y, test_size=0.2, random_state=42)

In [34]:
print(X_delta.head())
print(y.head())

   delta_r  delta_i
0    -44.0       -2
1     -8.5       -2
2     14.5        0
3      0.5        0
4    -37.5        0
0    2
1    1
2    1
3    1
4    2
Name: c5, dtype: int64


In [35]:
# Split the data into Horizon 1 and Horizon 6 groups
df_h1 = df_all[df_all["gameLength"] == 5]
df_h6 = df_all[df_all["gameLength"] == 10]

# Prepare inputs for each group
X_h1 = df_h1[["delta_r", "delta_i"]]
y_h1 = df_h1["c5"]

X_h6 = df_h6[["delta_r", "delta_i"]]
y_h6 = df_h6["c5"]


# paper model

full data table for model training 
- only for compare hyperparams, not generalization

In [36]:
# Train logistic regression for Horizon 1
model_h1 = LogisticRegression()
model_h1.fit(X_h1, y_h1)
alpha_beta_h1 = dict(zip(X_h1.columns, model_h1.coef_[0]))
intercept_h1 = model_h1.intercept_[0]

# Train logistic regression for Horizon 6
model_h6 = LogisticRegression()
model_h6.fit(X_h6, y_h6)
alpha_beta_h6 = dict(zip(X_h6.columns, model_h6.coef_[0]))
intercept_h6 = model_h6.intercept_[0]

alpha_beta_h1, intercept_h1, alpha_beta_h6, intercept_h6

({'delta_r': np.float64(-0.05479231554144084),
  'delta_i': np.float64(0.2582635967943217)},
 np.float64(-0.010566145795627696),
 {'delta_r': np.float64(-0.03198504895194719),
  'delta_i': np.float64(0.4091989183379467)},
 np.float64(0.06704102586863656))

next 
- use split horizon conditions for predictions 