In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# HUMAN DATA

In [10]:
# Get data
data = pd.read_csv('gameHelp_df.csv')

In [11]:
# Add new cols - binary if any avatar helps, binary if the other avatar helps
data['anyAvatarHelps'] = np.where((data['purpHelpFirst'] == 1) | (data['redHelpFirst'] == 1), 1, 0)
data['secondAvatarHelps'] = np.where((data['purpHelpFirst'] == 1) & (data['redAlsoHelp'] == 1) | (data['redHelpFirst'] == 1) & (data['purpAlsoHelp'] == 1), 1, 0)

In [12]:
data['patchSizeMax'] = data['patchSize'].apply(lambda x: np.max(eval(x)))
data['patchSizeMin'] = data['patchSize'].apply(lambda x: np.min(eval(x)))

In [13]:
# Define feature columns
feature_columns = ['costCond', 'visibilityCond', 'purpleBackpackSize', 'purpleEnergy', 'redBackpackSize', 'redEnergy', 'purpHelpFirst', 'redHelpFirst', 'anyAvatarHelps', 'nRedVeg', 'nPurpVeg', 'patchUniformity', 'nPatches', 'patchSizeMin', 'patchSizeMax']
X = data[feature_columns]
y = data['secondAvatarHelps']

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

## LOGISTIC REGRESSION

In [28]:
# Initialize and train logistic regression model
model = LogisticRegression(max_iter=1000, random_state=0)
model.fit(X_train, y_train)

In [29]:
# Make predictions
y_pred = model.predict(X_test)

In [30]:
# Evaluate model
print("LR Accuracy - Human:", accuracy_score(y_test, y_pred))
print("LR Classification Report - Human:\n", classification_report(y_test, y_pred))

LR Accuracy - Human: 0.8753315649867374
LR Classification Report - Human:
               precision    recall  f1-score   support

           0       0.94      0.90      0.92       912
           1       0.65      0.77      0.70       219

    accuracy                           0.88      1131
   macro avg       0.80      0.83      0.81      1131
weighted avg       0.89      0.88      0.88      1131



In [31]:
# Extract and rank coefficients
coefficients = pd.DataFrame({
    'Feature': feature_columns,
    'Coefficient': log_reg.coef_[0]
})
coefficients['AbsCoefficient'] = coefficients['Coefficient'].abs()
coefficients = coefficients.sort_values(by='AbsCoefficient', ascending=False)

print("Top Features by LR Coefficients (Unscaled Data) - Human:")
print(coefficients)

Top Features by Logistic Regression Coefficients (Unscaled Data):
               Feature  Coefficient  AbsCoefficient
8       anyAvatarHelps     2.125954        2.125954
7         redHelpFirst     1.383220        1.383220
6        purpHelpFirst     1.232639        1.232639
14        patchSizeMax    -0.657073        0.657073
10            nPurpVeg     0.472312        0.472312
5            redEnergy     0.416289        0.416289
11     patchUniformity    -0.394259        0.394259
0             costCond     0.384331        0.384331
13        patchSizeMin     0.323093        0.323093
3         purpleEnergy     0.202940        0.202940
12            nPatches     0.191077        0.191077
9              nRedVeg    -0.118860        0.118860
2   purpleBackpackSize     0.101278        0.101278
4      redBackpackSize    -0.101278        0.101278
1       visibilityCond     0.092911        0.092911


## RANDOM FOREST

In [23]:
# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=0)
model.fit(X_train, y_train)

In [24]:
# Make predictions
y_pred = model.predict(X_test)

In [25]:
# Evaluate model
print("RF Accuracy - Human:", accuracy_score(y_test, y_pred))
print("RF Classification Report - Human:\n", classification_report(y_test, y_pred))

RF Accuracy - Human: 0.9319186560565871
RF Classification Report - Human:
               precision    recall  f1-score   support

           0       0.97      0.95      0.96       912
           1       0.80      0.86      0.83       219

    accuracy                           0.93      1131
   macro avg       0.88      0.91      0.89      1131
weighted avg       0.93      0.93      0.93      1131



In [27]:
# Extract and rank feature importances
importances = pd.DataFrame({
    'Feature': feature_columns,
    'Importance': model.feature_importances_
})
importances = importances.sort_values(by='Importance', ascending=False)

print("Top Features by RF Importance - Human:")
print(importances)

Top Features by Random Forest Importance:
               Feature  Importance
8       anyAvatarHelps    0.224404
5            redEnergy    0.145959
3         purpleEnergy    0.144861
7         redHelpFirst    0.126953
6        purpHelpFirst    0.094353
11     patchUniformity    0.049498
2   purpleBackpackSize    0.037015
4      redBackpackSize    0.036498
10            nPurpVeg    0.033590
9              nRedVeg    0.028042
14        patchSizeMax    0.022507
1       visibilityCond    0.021199
0             costCond    0.016266
13        patchSizeMin    0.013957
12            nPatches    0.004898


# TABULAR Q-LEARNING

In [76]:
# Get data
data = pd.read_csv('tabQHelp_df.csv')

In [77]:
# Add new cols - binary if any avatar helps, binary if the other avatar helps
data['anyAvatarHelps'] = np.where((data['purpHelpFirst'] == 1) | (data['redHelpFirst'] == 1), 1, 0)
data['secondAvatarHelps'] = np.where((data['purpHelpFirst'] == 1) & (data['redAlsoHelp'] == 1) | (data['redHelpFirst'] == 1) & (data['purpAlsoHelp'] == 1), 1, 0)

In [78]:
data['patchSizeMax'] = data['patchSize'].apply(lambda x: np.max(eval(x)))
data['patchSizeMin'] = data['patchSize'].apply(lambda x: np.min(eval(x)))

In [79]:
data.head()

Unnamed: 0,session,gameNum,costCond,visibilityCond,resourceCond,purpleEnergy,purpleBackpackSize,redEnergy,redBackpackSize,purpHelpFirst,...,redAlsoHelp,nRedVeg,nPurpVeg,patchUniformity,nPatches,patchSize,anyAvatarHelps,secondAvatarHelps,patchSizeMax,patchSizeMin
0,1,1,1,1,0,18,5,8,3,0,...,-1,3,5,1,2,53,1,1,5,3
1,1,2,1,1,0,0,5,0,3,1,...,1,7,7,1,5,322223,1,1,3,2
2,1,3,1,1,0,0,5,0,3,0,...,-1,5,3,1,2,53,1,1,5,3
3,1,4,1,1,0,0,5,0,3,0,...,-1,7,7,0,5,322223,1,1,3,2
4,1,5,1,1,0,20,5,6,3,1,...,1,4,4,0,2,44,1,1,4,4


In [89]:
# Define feature columns
feature_columns = ['costCond', 'visibilityCond', 'purpleBackpackSize', 'purpleEnergy', 'redBackpackSize', 'redEnergy', 'purpHelpFirst', 'redHelpFirst', 'anyAvatarHelps', 'nRedVeg', 'nPurpVeg', 'patchUniformity', 'nPatches', 'patchSizeMin', 'patchSizeMax']
X = data[feature_columns]
y = data['secondAvatarHelps']

In [90]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

## LOGISTIC REGRESSION

In [91]:
# Initialize and train logistic regression model
model = LogisticRegression(max_iter=1000, random_state=0)
model.fit(X_train, y_train)

In [92]:
# Make predictions
y_pred = model.predict(X_test)

In [93]:
# Evaluate model
print("LR Accuracy - Tabular Q:", accuracy_score(y_test, y_pred))
print("LR Classification Report - Tabular Q:\n", classification_report(y_test, y_pred))

LR Accuracy - Tabular Q: 0.9861111111111112
LR Classification Report - Tabular Q:
               precision    recall  f1-score   support

           0       1.00      0.11      0.19        56
           1       0.99      1.00      0.99      3544

    accuracy                           0.99      3600
   macro avg       0.99      0.55      0.59      3600
weighted avg       0.99      0.99      0.98      3600



In [95]:
# Extract and rank coefficients
coefficients = pd.DataFrame({
    'Feature': feature_columns,
    'Coefficient': log_reg.coef_[0]
})
coefficients['AbsCoefficient'] = coefficients['Coefficient'].abs()
coefficients = coefficients.sort_values(by='AbsCoefficient', ascending=False)

print("Top Features by LR Coefficients (Unscaled Data) - Tabular Q:")
print(coefficients)

Shape of log_reg.coef_: (1, 15)
Number of features: 16
Top Features by LR Coefficients (Unscaled Data) - Tabular Q:
               Feature  Coefficient  AbsCoefficient
8       anyAvatarHelps     2.125954        2.125954
7         redHelpFirst     1.383220        1.383220
6        purpHelpFirst     1.232639        1.232639
14        patchSizeMax    -0.657073        0.657073
10            nPurpVeg     0.472312        0.472312
5            redEnergy     0.416289        0.416289
11     patchUniformity    -0.394259        0.394259
0             costCond     0.384331        0.384331
13        patchSizeMin     0.323093        0.323093
3         purpleEnergy     0.202940        0.202940
12            nPatches     0.191077        0.191077
9              nRedVeg    -0.118860        0.118860
2   purpleBackpackSize     0.101278        0.101278
4      redBackpackSize    -0.101278        0.101278
1       visibilityCond     0.092911        0.092911


## RANDOM FOREST

In [96]:
# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=0)
model.fit(X_train, y_train)

In [97]:
# Make predictions
y_pred = model.predict(X_test)

In [98]:
# Evaluate model
print("RF Accuracy - Tabular Q:", accuracy_score(y_test, y_pred))
print("RF Classification Report - Tabular Q:\n", classification_report(y_test, y_pred))

RF Accuracy - Tabular Q: 0.9875
RF Classification Report - Tabular Q:
               precision    recall  f1-score   support

           0       0.92      0.21      0.35        56
           1       0.99      1.00      0.99      3544

    accuracy                           0.99      3600
   macro avg       0.96      0.61      0.67      3600
weighted avg       0.99      0.99      0.98      3600



In [99]:
# Extract and rank feature importances
importances = pd.DataFrame({
    'Feature': feature_columns,
    'Importance': model.feature_importances_
})
importances = importances.sort_values(by='Importance', ascending=False)

print("Top Features by RF Importance - Tabular Q:")
print(importances)

Top Features by RF Importance - Tabular Q:
               Feature  Importance
3         purpleEnergy    0.425804
5            redEnergy    0.232707
8       anyAvatarHelps    0.144310
0             costCond    0.036504
1       visibilityCond    0.024155
7         redHelpFirst    0.022528
9              nRedVeg    0.020356
4      redBackpackSize    0.018950
6        purpHelpFirst    0.017131
2   purpleBackpackSize    0.016780
10            nPurpVeg    0.015579
11     patchUniformity    0.013697
14        patchSizeMax    0.006385
13        patchSizeMin    0.004041
12            nPatches    0.001073
