# Data Loading

In [10]:
import pandas as pd

df = pd.read_json('../data/trimmed_games.json')

# Data Filtering

In [43]:
offense_columns = [
    'game_won',
    'passing_attempts',
    'passing_completions',
    'passing_yards',
    'passing_rating',
    'passing_touchdowns',
    'passing_interceptions',
    'passing_sacks',
    'passing_sacks_yards_lost',
    'rushing_attempts',
    'rushing_yards',
    'rushing_touchdowns',
    'receiving_targets',
    'receiving_receptions',
    'receiving_yards',
    'receiving_touchdowns'
]

# Filter down to offense_columns
offense = df[offense_columns]

# Drop bad rows
offense = offense.dropna()

# Convert game_won to 0 or 1
offense['game_won'] = offense['game_won'].astype(int)

# Grab y values from 'games_won' and drop it from our DataFrame.
y = offense['game_won'].values

offense = offense.drop('game_won', axis=1)

print(offense.columns)

Index(['passing_attempts', 'passing_completions', 'passing_yards',
       'passing_rating', 'passing_touchdowns', 'passing_interceptions',
       'passing_sacks', 'passing_sacks_yards_lost', 'rushing_attempts',
       'rushing_yards', 'rushing_touchdowns', 'receiving_targets',
       'receiving_receptions', 'receiving_yards', 'receiving_touchdowns'],
      dtype='object')


# ML Model

In [45]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Get X
features = offense.columns
X = offense.to_numpy()

print(X.shape)
print(y.shape)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

# Create a Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=10)

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the train data
train_pred = rf_classifier.predict(X_train)

# Make predictions on the test data
y_pred = rf_classifier.predict(X_test)

print(y_pred)
print(y_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_train, train_pred)
print(f"Train Accuracy: {accuracy}")

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy}")

(413159, 15)
(413159,)
[1 0 1 ... 1 1 1]
[1 0 1 ... 1 0 0]
Train Accuracy: 0.5622778169408248
Test Accuracy: 0.5149215800174267
