In [20]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, cohen_kappa_score, make_scorer
from pickle import dump

towers = pd.read_csv('data/tower_data.csv')
rounds = pd.read_csv('data/rounds_data.csv')
attempts = pd.read_csv('data/attempt_data.csv')
bloon_data = pd.read_csv('assets/bloon_rounds.csv')

#state lives lost in previous round, not current
rounds['lives_lost'] = rounds['lives_lost'].shift(-1)
rounds['lives_lost'] = rounds['lives_lost'].fillna(0)
rounds['previous_action'] = rounds['action'].shift(1)
rounds['previous_action'] = rounds['previous_action'].fillna('none')

rounds['lost_hp'] = rounds['lives_lost'] != 0 #Response

#add tower cols
for col in list(set(towers['type'])):
    rounds[col] = 0

#current monkey placements
for _, row in towers.iterrows():
    attempt = row['attempt']
    round_placed = row['round_placed']
    tower_type = row['type']
    
    mask = (rounds['attempt'] == attempt) & (rounds['round'] >= round_placed)
    rounds.loc[mask, tower_type] += 1

#merge bloon data
round_pred = pd.merge(rounds,bloon_data, left_on='round', right_on='Round')

#prepare data
response = round_pred['lost_hp']
round_pred = round_pred.drop(['attempt','action','Round','round','lives','lives_lost','lost_hp'],axis=1)
round_pred = pd.get_dummies(round_pred)
round_pred = round_pred.astype(int)

#split
X_train, X_test, y_train, y_test = train_test_split(round_pred, response, test_size=0.2)

#scale
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [2]:
kappa_scorer = make_scorer(cohen_kappa_score)

param_dist = {
    'n_estimators': [100, 200, 300, 400, 500],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False]
}

rf = RandomForestClassifier()

best_model = RandomizedSearchCV(
    estimator=rf, 
    param_distributions=param_dist, 
    n_iter=50, 
    scoring=kappa_scorer, 
    cv=5, 
    n_jobs=-1,
    verbose=3
)

best_model.fit(X_train, y_train)

print("Best Parameters:", best_model.best_params_)
print("Best Cohen's Kappa:", best_model.best_score_)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best Parameters: {'n_estimators': 300, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': None, 'max_depth': 30, 'bootstrap': True}
Best Cohen's Kappa: 0.3196046424617356


In [21]:
#Random Forest
model = RandomForestClassifier(n_estimators=300, min_samples_split=2, min_samples_leaf=4, max_features=None, max_depth=30, bootstrap=True, class_weight='balanced')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

       False       0.80      0.79      0.79      1059
        True       0.56      0.58      0.57       500

    accuracy                           0.72      1559
   macro avg       0.68      0.68      0.68      1559
weighted avg       0.72      0.72      0.72      1559



In [29]:
#tweak for better specificity
probabilities = model.predict_proba(X_test)[:, 1]
predictions = (probabilities >= .25).astype(int)

print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

       False       0.90      0.50      0.64      1059
        True       0.46      0.88      0.60       500

    accuracy                           0.62      1559
   macro avg       0.68      0.69      0.62      1559
weighted avg       0.76      0.62      0.63      1559



In [22]:
#save model
with open('assets/round_predictions.pkl', 'wb') as f:
    dump(model, f)

with open('assets/round_pred_scale.pkl', 'wb') as f:
    dump(scaler, f)