# NBA Game Outcome

In [None]:
# Import Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix

# Read the Data and Perform Basic Data Cleaning

In [1]:
columns = ["HOME_TEAM_ID", "VISITOR_TEAM_ID", "SEASON", "PTS_home", "FG_PCT_home", "FT_PCT_home", "FG3_PCT_home", "AST_home", "REB_home", "PTS_away", "FG_PCT_away", "FT_PCT_away", "FG3_PCT_away", "AST_away", "REB_away"]

target = ["HOME_TEAM_WINS"]

In [None]:
# Load the data from AWS - REVIEW WITH TEAM
file_path ="https://YOUR-BUCKET-NAME.s3.amazonaws.com/INSERTLINK"
df = pd.read_csv(file_path, skiprows=1)[:-2]
df = df.loc[:, columns].copy()

# Drop the null comumns where all values are null
df = df.dropna(axis='columns', how='all')

# Drop the null rows
df = df.dropna()

# Convert the target column values to win/loss based on their values
x = {'1': 'Win'}   
df = df.replace(x)

x = {'0': 'Loss'}   
df = df.replace(x)

df.reset_index(inplace=True, drop=True)

df.head()

# Split the Data into Training and Testing

In [None]:
# Create our features
X = df.drop("HOME_TEAM_WINS", axis=1)

X = pd.get_dummies(X)

# Create our target
y = df.loc[:, 'HOME_TEAM_WINS']

In [None]:
# Check the balance of our target values
y.value_counts()

In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

# Balanced Random Forest Classifier

In [None]:
# Resample the training data with the BalancedRandomForestClassifier
random_forest = BalancedRandomForestClassifier(n_estimators = 100)
random_forest = random_forest.fit(X_train, y_train)

In [None]:
# Calculated the balanced accuracy score
y_pred = random_forest.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

In [None]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

In [None]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

In [None]:
# List the features sorted in descending order by feature importance
feature_names = X.columns
sorted(zip(random_forest.feature_importances_, feature_names), reverse=True)