# Import Dependencies

In [1]:
# import dependencies
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
%matplotlib inline

# Load Data

In [2]:
# I will name the dataset data when it is loaded in
df = pd.read_csv("fake_data.csv")
df.head()

Unnamed: 0,Home_Team,Away_Team,Home_Score,Away_Score,Rebounds,Assists,Blocks,Total_Score,Line,Over_Under,Bet,Met_Not_Met
0,Atlanta Hawks,Washington Wizards,110,110,43,23,6,220,232,1,1,1
1,Boston Celtics,Utah Jazz,105,115,51,25,3,220,216,0,0,1
2,Brooklyn Nets,Toronto Raptors,113,104,55,26,4,217,208,1,0,0
3,Charlotte Hornets,San Antonio Spurs,100,95,42,30,2,195,193,1,1,1
4,Chicago Bulls,Sacramento Kings,103,109,43,25,5,212,193,0,1,0


# Prepocess and Encode Data

In [3]:
# encode the columns (run this column only if needed)
dummies = pd.get_dummies(df, columns=['Home_Team','Away_Team'])
dummies.head()

Unnamed: 0,Home_Score,Away_Score,Rebounds,Assists,Blocks,Total_Score,Line,Over_Under,Bet,Met_Not_Met,...,Away_Team_Oklahoma City Thunder,Away_Team_Orlando Magic,Away_Team_Philadelphia 76ers,Away_Team_Phoenix Suns,Away_Team_Portland Trail Blazers,Away_Team_Sacramento Kings,Away_Team_San Antonio Spurs,Away_Team_Toronto Raptors,Away_Team_Utah Jazz,Away_Team_Washington Wizards
0,110,110,43,23,6,220,232,1,1,1,...,False,False,False,False,False,False,False,False,False,True
1,105,115,51,25,3,220,216,0,0,1,...,False,False,False,False,False,False,False,False,True,False
2,113,104,55,26,4,217,208,1,0,0,...,False,False,False,False,False,False,False,True,False,False
3,100,95,42,30,2,195,193,1,1,1,...,False,False,False,False,False,False,True,False,False,False
4,103,109,43,25,5,212,193,0,1,0,...,False,False,False,False,False,True,False,False,False,False


In [7]:
X = dummies.copy()
X.drop("Met_Not_Met",axis=1,inplace=True)
X.head()

Unnamed: 0,Home_Score,Away_Score,Rebounds,Assists,Blocks,Total_Score,Line,Over_Under,Bet,Home_Team_Atlanta Hawks,...,Away_Team_Oklahoma City Thunder,Away_Team_Orlando Magic,Away_Team_Philadelphia 76ers,Away_Team_Phoenix Suns,Away_Team_Portland Trail Blazers,Away_Team_Sacramento Kings,Away_Team_San Antonio Spurs,Away_Team_Toronto Raptors,Away_Team_Utah Jazz,Away_Team_Washington Wizards
0,110,110,43,23,6,220,232,1,1,True,...,False,False,False,False,False,False,False,False,False,True
1,105,115,51,25,3,220,216,0,0,False,...,False,False,False,False,False,False,False,False,True,False
2,113,104,55,26,4,217,208,1,0,False,...,False,False,False,False,False,False,False,True,False,False
3,100,95,42,30,2,195,193,1,1,False,...,False,False,False,False,False,False,True,False,False,False
4,103,109,43,25,5,212,193,0,1,False,...,False,False,False,False,False,True,False,False,False,False


In [8]:
y = dummies['Met_Not_Met'].values.reshape(-1,1)

# Split, Train, Scale Data

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [11]:
# Creating StandardScaler instance
scaler = StandardScaler()

In [12]:
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)

In [13]:
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Fit the Random Forest Model

In [14]:
# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=500, random_state=78)

In [15]:
# Fit the model and use .ravel()on the "y_train" data. 
rf_model = rf_model.fit(X_train_scaled, y_train.ravel())

# Make Predictions Using the Random Forest Model

In [16]:
# Making predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

# Model Evaluation

In [17]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)


In [18]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,6,3
Actual 1,3,3


Accuracy Score : 0.6
Classification Report
              precision    recall  f1-score   support

           0       0.67      0.67      0.67         9
           1       0.50      0.50      0.50         6

    accuracy                           0.60        15
   macro avg       0.58      0.58      0.58        15
weighted avg       0.60      0.60      0.60        15



# Feature Importance

In [19]:
# get important features
importances = rf_model.feature_importances_

#sort by importance
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.10418294552416196, 'Total_Score'),
 (0.10089124240755644, 'Home_Score'),
 (0.09010945328926727, 'Line'),
 (0.08017413287690199, 'Away_Score'),
 (0.07738975586220441, 'Rebounds'),
 (0.07100502094148069, 'Assists'),
 (0.04964840461498833, 'Blocks'),
 (0.028556247785745795, 'Over_Under'),
 (0.024668877988688477, 'Bet'),
 (0.011958335450076202, 'Home_Team_Detroit Pistons'),
 (0.011474821165552776, 'Home_Team_San Antonio Spurs'),
 (0.011396736123448187, 'Away_Team_Charlotte Hornets'),
 (0.01126810317340309, 'Away_Team_Washington Wizards'),
 (0.01058298582375828, 'Away_Team_Milwaukee Bucks'),
 (0.010192472230336045, 'Home_Team_Orlando Magic'),
 (0.010047645269899114, 'Away_Team_Denver Nuggets'),
 (0.010009340098647465, 'Home_Team_Chicago Bulls'),
 (0.009485490567515738, 'Away_Team_Orlando Magic'),
 (0.00945452005004766, 'Away_Team_Detroit Pistons'),
 (0.00931846905267851, 'Away_Team_Sacramento Kings'),
 (0.008822156278234076, 'Home_Team_Atlanta Hawks'),
 (0.008758640269290777, 'Home_Team