In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score

In [6]:
url = "https://raw.githubusercontent.com/jmaxwallace/Capstone/main/model_base.csv"
df = pd.read_csv(url)

display(df.columns)

Index(['Interaction index', 'Player index', 'Opponent index', 'Player name',
       'Opponent name', 'Score', 'Score per turn', 'Initial cooperation', '0',
       '1',
       ...
       '190', '191', '192', '193', '194', '195', '196', '197', '198', '199'],
      dtype='object', length=208)

In [16]:
interaction = df['Interaction index']
final_scores = df['Score']
turns = df.drop(columns = ['Interaction index', 'Player index', 'Opponent index', 'Player name',
       'Opponent name', 'Score', 'Score per turn', 'Initial cooperation'])

display(turns)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
1,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,0
3,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,0,0
4,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,0,1,1,0,1,0,1,0,1,0,...,1,1,0,0,1,0,1,0,1,1
236,1,0,0,1,1,0,0,0,0,1,...,1,0,1,1,0,0,1,0,1,0
237,0,0,1,0,1,1,0,0,1,0,...,0,1,1,1,0,0,0,1,1,1
238,1,1,1,0,1,1,0,0,1,0,...,0,0,0,0,0,0,0,1,1,1


In [146]:
N = 10

X = []
y = []

for game_idx in range(0, len(turns), 2):
    strat_A_moves = turns.iloc[game_idx].values
    strat_B_moves = turns.iloc[game_idx + 1].values

    # Can change to game_idx + 1 to read for Strategy B's scores
    score = final_scores[game_idx]

    for turn in range(N, 200):
        A_recent = strat_A_moves[turn-N:turn]
        B_recent = strat_B_moves[turn-N:turn]
        X.append(np.hstack([A_recent, B_recent]))
        y.append(score)

# Convert to arrays
X = np.array(X)
y = np.array(y)

display(X.shape)

(22800, 20)

In [108]:
display(y.shape)

(22800,)

In [111]:
from sklearn.preprocessing import StandardScaler

# Normalize moves
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [113]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit linear regression model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predictions
y_pred = lin_reg.predict(X_test)

# Performance metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Linear Regression Performance:")
print(f"  MSE: {mse:.3f}")
print(f"  MAE: {mae:.3f}")
print(f"  R^2: {r2:.3f}")

Linear Regression Performance:
  MSE: 5896.072
  MAE: 52.849
  R^2: 0.753


In [180]:
def create_dataset(turns, final_scores, N):
    """
    Generates the X (features) and y (targets) variables for a dual-output model.
    
    Parameters:
    - turns: DataFrame where each row is one half of a game (strategy A's moves in row 0, strategy B's in row 1)
    - final_scores: List or array of final scores for each game
    - N: Lookback window size (i.e., number of previous turns to consider)

    Returns:
    - X: Features (input data) of shape (num_samples, 2 * N) 
    - y: Targets (output data) of shape (num_samples, 2) (final scores for both strategies)
    """
    
    X = []
    y = []

    # Loop over each game, assuming games are grouped in pairs of rows
    for game_idx in range(0, len(turns), 2):
        # Get the moves for Strategy A and Strategy B
        strat_A_moves = turns.iloc[game_idx].values
        strat_B_moves = turns.iloc[game_idx + 1].values

        # Get the final scores for Strategy A and Strategy B
        score_A = final_scores[game_idx]
        score_B = final_scores[game_idx + 1]

        # Generate samples for each turn from N to 200 (based on lookback)
        for turn in range(N, 200):
            # Get the most recent N moves for both strategies
            A_recent = strat_A_moves[turn-N:turn]
            B_recent = strat_B_moves[turn-N:turn]

            # Combine the recent moves into a single input sample
            X.append(np.hstack([A_recent, B_recent]))
            # Append the corresponding final scores to y
            y.append([score_A, score_B])

    # Convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    # Normalize moves
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y


In [272]:
X, y = create_dataset(turns, final_scores, 10)

print(X.shape)
print(y.shape)

(22800, 20)
(22800, 2)


In [273]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predict on test set
y_pred = lin_reg.predict(X_test)

# Evaluate performance for each output separately
mse_A = mean_squared_error(y_test[:, 0], y_pred[:, 0])
mse_B = mean_squared_error(y_test[:, 1], y_pred[:, 1])

mae_A = mean_absolute_error(y_test[:, 0], y_pred[:, 0])
mae_B = mean_absolute_error(y_test[:, 1], y_pred[:, 1])

r2_A = r2_score(y_test[:, 0], y_pred[:, 0])
r2_B = r2_score(y_test[:, 1], y_pred[:, 1])

print(f"Strategy A - MSE: {mse_A:.3f}, MAE: {mae_A:.3f}, R²: {r2_A:.3f}")
print(f"Strategy B - MSE: {mse_B:.3f}, MAE: {mae_B:.3f}, R²: {r2_B:.3f}")

Strategy A - MSE: 5896.072, MAE: 52.849, R²: 0.753
Strategy B - MSE: 5786.727, MAE: 44.546, R²: 0.844


In [288]:
X, y = create_dataset(turns, final_scores, 20)

print(X.shape)
print(y.shape)

(21600, 40)
(21600, 2)


In [290]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predict on test set
y_pred = lin_reg.predict(X_test)

# Evaluate performance for each output separately
mse_A = mean_squared_error(y_test[:, 0], y_pred[:, 0])
mse_B = mean_squared_error(y_test[:, 1], y_pred[:, 1])

mae_A = mean_absolute_error(y_test[:, 0], y_pred[:, 0])
mae_B = mean_absolute_error(y_test[:, 1], y_pred[:, 1])

r2_A = r2_score(y_test[:, 0], y_pred[:, 0])
r2_B = r2_score(y_test[:, 1], y_pred[:, 1])

print(f"Strategy A - MSE: {mse_A:.3f}, MAE: {mae_A:.3f}, R²: {r2_A:.3f}")
print(f"Strategy B - MSE: {mse_B:.3f}, MAE: {mae_B:.3f}, R²: {r2_B:.3f}")

Strategy A - MSE: 4032.476, MAE: 41.819, R²: 0.831
Strategy B - MSE: 3836.472, MAE: 36.563, R²: 0.896


In [292]:
X, y = create_dataset(turns, final_scores, 100)

print(X.shape)
print(y.shape)

(12000, 200)
(12000, 2)


In [294]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predict on test set
y_pred = lin_reg.predict(X_test)

# Evaluate performance for each output separately
mse_A = mean_squared_error(y_test[:, 0], y_pred[:, 0])
mse_B = mean_squared_error(y_test[:, 1], y_pred[:, 1])

mae_A = mean_absolute_error(y_test[:, 0], y_pred[:, 0])
mae_B = mean_absolute_error(y_test[:, 1], y_pred[:, 1])

r2_A = r2_score(y_test[:, 0], y_pred[:, 0])
r2_B = r2_score(y_test[:, 1], y_pred[:, 1])

print(f"Strategy A - MSE: {mse_A:.3f}, MAE: {mae_A:.3f}, R²: {r2_A:.3f}")
print(f"Strategy B - MSE: {mse_B:.3f}, MAE: {mae_B:.3f}, R²: {r2_B:.3f}")

Strategy A - MSE: 975.440, MAE: 22.113, R²: 0.959
Strategy B - MSE: 1073.848, MAE: 23.947, R²: 0.972


In [296]:
X, y = create_dataset(turns, final_scores, 150)

print(X.shape)
print(y.shape)

(6000, 300)
(6000, 2)


In [298]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predict on test set
y_pred = lin_reg.predict(X_test)

# Evaluate performance for each output separately
mse_A = mean_squared_error(y_test[:, 0], y_pred[:, 0])
mse_B = mean_squared_error(y_test[:, 1], y_pred[:, 1])

mae_A = mean_absolute_error(y_test[:, 0], y_pred[:, 0])
mae_B = mean_absolute_error(y_test[:, 1], y_pred[:, 1])

r2_A = r2_score(y_test[:, 0], y_pred[:, 0])
r2_B = r2_score(y_test[:, 1], y_pred[:, 1])

print(f"Strategy A - MSE: {mse_A:.3f}, MAE: {mae_A:.3f}, R²: {r2_A:.3f}")
print(f"Strategy B - MSE: {mse_B:.3f}, MAE: {mae_B:.3f}, R²: {r2_B:.3f}")

Strategy A - MSE: 598.812, MAE: 18.546, R²: 0.974
Strategy B - MSE: 703.978, MAE: 20.333, R²: 0.980


In [351]:
X, y = create_dataset(turns, final_scores, 180)

print(X.shape)
print(y.shape)

(2400, 360)
(2400, 2)


In [353]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predict on test set
y_pred = lin_reg.predict(X_test)

# Evaluate performance for each output separately
mse_A = mean_squared_error(y_test[:, 0], y_pred[:, 0])
mse_B = mean_squared_error(y_test[:, 1], y_pred[:, 1])

mae_A = mean_absolute_error(y_test[:, 0], y_pred[:, 0])
mae_B = mean_absolute_error(y_test[:, 1], y_pred[:, 1])

r2_A = r2_score(y_test[:, 0], y_pred[:, 0])
r2_B = r2_score(y_test[:, 1], y_pred[:, 1])

print(f"Strategy A - MSE: {mse_A:.3f}, MAE: {mae_A:.3f}, R²: {r2_A:.3f}")
print(f"Strategy B - MSE: {mse_B:.3f}, MAE: {mae_B:.3f}, R²: {r2_B:.3f}")

Strategy A - MSE: 594.501, MAE: 17.830, R²: 0.976
Strategy B - MSE: 624.159, MAE: 18.394, R²: 0.982
