In [18]:
#Import packages
import pandas as pd 
import random 
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import utils
from sklearn import metrics

In [19]:
#Import data
team_data = pd.read_csv('nba_stats_2020_2021.csv')
player_data = pd.read_csv('player_stats_2020_2021.csv')

In [20]:
#Fit Model on Training Data
lab_enc = preprocessing.LabelEncoder()
box_scores = team_data[['PTS', 'FGM', 'FG3M', 'FGA', 'FTA', 'OR', 'DR', 'TO']]
encoded = lab_enc.fit_transform(box_scores.values.ravel())
drop_col = ['PTS']
X = box_scores.drop(drop_col, 1)
Y = team_data['PTS']
y = Y.values
for val in range(len(y)):
    y[val] = round(y[val])
X_train, X_test, Y_train, Y_test = train_test_split(X, y, train_size = 0.75)
#Bagging Classifier
bclf = BaggingClassifier(base_estimator=SVC(gamma='scale'), n_estimators=10, random_state=0)
bclf.fit(X_train, Y_train.ravel())
#Random Forest Classifier
rfc = RandomForestClassifier(max_depth=8, random_state=0, n_estimators=300)
rfc.fit(X_train, Y_train.ravel())
#Update points scored with averaged predictions
for index in range(len(team_data['PTS'])):
    avg_total_score = (bclf.predict(X.values)[index] + rfc.predict(X.values)[index])/2
    team_data['PTS'].values[index] = random.uniform(avg_total_score + 5, avg_total_score - 5)

In [21]:
#Set home and away teams
home = 'Philadelphia 76ers'
away = 'Oklahoma City Thunder'
homeI = 'PHI'
awayI = 'OKC'

In [22]:
#Set spread
spread = 15

In [23]:
#Set Over/Under total
total = 219.5

In [24]:
#Set injuries
homeInjuries = ['Danny Green']
awayInjuries = ['Luguentz Dort', 'Shai Gilgeous-Alexander', 'Mike Muscala']

In [25]:
#Split data into home team data and away team data
homedf = team_data[team_data.Team == home]
awaydf = team_data[team_data.Team == away]

homePER = player_data[player_data.Tm == homeI]
awayPER = player_data[player_data.Tm == awayI]

In [26]:
#Calculate the league average for total points scored
avgTotal = team_data["PTS"].mean() + team_data["PTSA"].mean()

In [27]:
#Get necessary stats for home team
homeTeam = homedf.Team
homeFGM = homedf.FGM
homeFG3M = homedf.FG3M
homeFGA = homedf.FGA
homeFTA = homedf.FTA
homeTOV = homedf.TO
homeOR = homedf.OR
homeDR = homedf.DR
homePoints = homedf.PTS
homePA = homedf.PTSA
homeRoster = len(homePER)
avg_homePER = homePER["PER"].mean()
sum_homePER = homePER["PER"].sum()
num_homeInjuries = len(homeInjuries)

In [28]:
#Get necessary stats for away team
awayTeam = awaydf.Team
awayFGM = awaydf.FGM
awayFG3M = awaydf.FG3M
awayFGA = awaydf.FGA
awayFTA = awaydf.FTA
awayTOV = awaydf.TO
awayOR = awaydf.OR
awayDR = awaydf.DR
awayPoints = awaydf.PTS
awayPA = awaydf.PTSA
awayRoster = len(awayPER)
avg_awayPER = awayPER["PER"].mean()
sum_awayPER = awayPER["PER"].sum()
num_awayInjuries = len(awayInjuries)

In [29]:
#Calculates home team potential based on injured players
missing_homePER = 0
for player in homeInjuries:
    missing_homePER += homePER[homePER.Player == player].PER.item()
adjusted_homePER = (sum_homePER - missing_homePER)/(homeRoster - num_homeInjuries)
potential_homePER = (9.7 * (adjusted_homePER/avg_homePER) + 0.3 * (sum_homePER - missing_homePER)/sum_homePER)/10

In [30]:
#Calculates away team potential based on injured players
missing_awayPER = 0
for player in awayInjuries:
    missing_awayPER += awayPER[awayPER.Player == player].PER.item()
adjusted_awayPER = (sum_awayPER - missing_awayPER)/(awayRoster - num_awayInjuries)
potential_awayPER = (9.7 * (adjusted_awayPER/avg_awayPER) + 0.3 * (sum_awayPER - missing_awayPER)/sum_awayPER)/10

In [31]:
#Predicts Spread
def spreadSim():
    #Calculates difference in effective field goal percentages
    HEFG = potential_homePER.item() * ((homeFGM + 0.5 * homeFG3M)/homeFGA).item()
    AEFG = potential_awayPER.item() * ((awayFGM + 0.5 * awayFG3M)/awayFGA).item()
    efg = random.uniform(HEFG - 5, HEFG + 5) - random.uniform(AEFG - 5, AEFG + 5)
    #Calculates difference in turnover percentages
    HTOV = potential_homePER.item() * (homeFGA - homeOR + homeTOV + (0.4 * homeFTA)).item()
    ATOV = potential_awayPER.item() * (awayFGA - awayOR + awayTOV + (0.4 * awayFTA)).item()
    tov = random.uniform(HTOV - 3, HTOV + 3) - random.uniform(ATOV - 3, ATOV + 3)
    #Calculates difference in offensive rebounding percentages
    HORB = potential_homePER.item() * (homeOR.item()/(homeOR.item() + awayDR.item()))
    HDRB = potential_homePER.item() * homeDR.item()/(awayOR.item() + homeDR.item())
    orb = random.uniform(HORB - 3, HORB + 3) - random.uniform(HDRB - 3, HDRB + 3)
    #Calculates difference in free throws attempted percentages
    HFT = potential_homePER.item() * (homeFTA/homeFGA).item()
    AFT = potential_awayPER.item() * (awayFTA/awayFGA).item()
    ft = random.uniform(HFT - 3, HFT + 3) - random.uniform(AFT - 3, AFT + 3)
    #Calculates Spread
    sim = 2 * ((0.4 * efg) + (0.25 * tov) + (0.2 * orb) + (0.15 * ft))
    rsim = round(sim*2)/2
    if (rsim > spread):
        return 1
    elif (rsim < spread):
        return -1
    else:
        return 0

In [32]:
#Predicts total for Over/Under
def scoreSim():
    #Calculates total for home team games
    gph1 = potential_homePER.item() * random.uniform(homePoints.item() - 3, homePoints.item() + 3)
    gph2 = potential_homePER.item() * random.uniform(homePA.item() - 3, homePA.item() + 3)
    gamePointsHome = (gph1 + gph2)
    #Calculates total for away team games
    gpa1 = potential_awayPER.item() * random.uniform(awayPoints.item() - 3, awayPoints.item() + 3)
    gpa2 = potential_awayPER.item() * random.uniform(awayPA.item() - 3, awayPA.item() + 3)
    gamePointsAway = (gpa1+gpa2)
    #Calculates predicted total in this match-up
    gameTotal = (gamePointsHome * gamePointsAway)/avgTotal
    rgameTotal = round(2*gameTotal)/2
    return rgameTotal

In [33]:
def gamesSim(number_of_simulations):
    hometeamcovers = 0
    awayteamcovers = 0
    pushcover = 0
    averageSpread = 0
    over = 0
    under = 0
    pushTotal = 0
    averageTotal = 0
    for i in range(number_of_simulations):
        game = spreadSim()
        averageSpread += game
        if game > spread:
            hometeamcovers += 1
        elif game < spread:
            awayteamcovers += 1
        else:
            pushcover += 1
    for i in range(number_of_simulations):
        score = scoreSim()
        averageTotal += score
        if score > total:
            over += 1
        elif score < total:
            under += 1
        else:
            pushTotal += 1
    print(homeTeam.item() + ' covers', (hometeamcovers/(hometeamcovers+awayteamcovers+pushcover)) * 100,'% of the simulations')
    print(awayTeam.item() + ' covers', awayteamcovers/(hometeamcovers+awayteamcovers+pushcover) * 100,'% of the simulations')
    print('The spread pushes', pushcover/(hometeamcovers+awayteamcovers+pushcover) * 100, '% of the simulations')
    print('The average spread is', averageSpread/number_of_simulations)
    print('The total is over', (over)/(over+under+pushTotal) * 100,'% of the simulations')
    print('The total is under', (under)/(over+under+pushTotal) * 100,'% of the simulations')
    print('The total pushes', pushTotal/(over+under+pushTotal) * 100, '% of the simulations')
    print('The average total is', averageTotal/number_of_simulations)

In [34]:
gamesSim(10000)

Philadelphia 76ers covers 0.0 % of the simulations
Oklahoma City Thunder covers 100.0 % of the simulations
The spread pushes 0.0 % of the simulations
The average spread is -1.0
The total is over 87.76 % of the simulations
The total is under 9.54 % of the simulations
The total pushes 2.7 % of the simulations
The average total is 223.8268
