# Data Loading

In [10]:
import pandas as pd

df = pd.read_json('../data/trimmed_games.json')

# Data Filtering

In [57]:
# change game_won to int
df['game_won'] = df['game_won'].astype(int)

offense_columns = [
    'game_won',
    'passing_attempts',
    'passing_completions',
    'passing_yards',
    'passing_rating',
    'passing_touchdowns',
    'passing_interceptions',
    'passing_sacks',
    'passing_sacks_yards_lost',
    'rushing_attempts',
    'rushing_yards',
    'rushing_touchdowns',
    'receiving_targets',
    'receiving_receptions',
    'receiving_yards',
    'receiving_touchdowns'
]

defense_columns = [
    'game_won',
    'defense_sacks',
    'defense_tackles',
    'defense_tackle_assists',
    'defense_interceptions',
    'defense_interception_yards',
    'defense_interception_touchdowns',
    'defense_safeties'
]

# Filter down to offense_columns
offense = df[offense_columns]
defense = df[defense_columns]

# Drop bad rows
offense = offense.dropna()
defense = defense.dropna()

# Grab y values from 'games_won' and drop it from our DataFrame.
y_offense = offense['game_won'].values
offense = offense.drop('game_won', axis=1)

y_defense = defense['game_won'].values
defense = defense.drop('game_won', axis=1)

# RandomForest

In [60]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Helper function to run RandomForest on a data.
def RF(X, y):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)
    
    # Create a Random Forest Classifier
    rf_classifier = RandomForestClassifier(random_state=10)
    
    # Train the classifier on the training data
    rf_classifier.fit(X_train, y_train)
    
    # Make predictions on the train data
    train_pred = rf_classifier.predict(X_train)
    
    # Make predictions on the test data
    y_pred = rf_classifier.predict(X_test)
    
    # Evaluate the accuracy of the model
    accuracy = accuracy_score(y_train, train_pred)
    print(f"Train Accuracy: {accuracy}")
    
    # Evaluate the accuracy of the model
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {accuracy}")

print("Offense:")
RF(offense.to_numpy(), y_offense)

print("Defense:")
RF(defense.to_numpy(), y_defense)

Offense:
Train Accuracy: 0.5622778169408248
Test Accuracy: 0.5149215800174267
Defense:
Train Accuracy: 0.5179183546276098
Test Accuracy: 0.5175597831348631


# Hyperparameter Testing

In [65]:
from sklearn.model_selection import GridSearchCV

# Helper function to perform hyperparameter tuning on RandomForest
def tune_RF(X, y):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

    # Create a Random Forest Classifier
    rf_classifier = RandomForestClassifier(random_state=10)

    # Define the hyperparameters and their possible values
    param_grid = {
        'n_estimators': [50, 100, 200, 500],
        'max_depth': [None, 10, 20, 50, 100, 200],
        'max_leaf_nodes': [None, 1, 2, 4, 8, 16, 32, 64, 128]
    }

    # Use GridSearchCV to find the best combination of hyperparameters
    grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=3, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Get the best hyperparameters
    best_params = grid_search.best_params_
    print(f"Best Hyperparameters: {best_params}")

    # Train the classifier with the best hyperparameters on the training data
    best_rf_classifier = RandomForestClassifier(**best_params, random_state=10)
    best_rf_classifier.fit(X_train, y_train)

    # Make predictions on the train data
    train_pred = best_rf_classifier.predict(X_train)

    # Make predictions on the test data
    y_pred = best_rf_classifier.predict(X_test)

    # Evaluate the accuracy of the model
    accuracy_train = accuracy_score(y_train, train_pred)
    print(f"Train Accuracy: {accuracy_train}")

    # Evaluate the accuracy of the model on the test data
    accuracy_test = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {accuracy_test}")

# Example usage:
print("Offense:")
tune_RF(offense.to_numpy(), y_offense)

print("Defense:")
tune_RF(defense.to_numpy(), y_defense)

Offense:


KeyboardInterrupt: 