## Import relevant libraries

In [1]:
import pandas as pd
from sklearn.metrics import mean_squared_error
import math

## Read in the data

In [2]:
final = pd.read_csv("nba_final_standings.csv")

In [3]:
### This csv contains NBA team final season standings, predicted wins and win% based on our partial least squares model, and projected wins and win% from
### basketball-reference.com. Note the projected statistics from our model and basketball-reference were taken from projections on February 24th, 2022, 
### during the All-Star break. Let's see how well our PLS model predicted post All-Star break wins for each team. We can also compare results to the
### projections basketball-reference.com make public on their site.

final.head()

Unnamed: 0,Conference,Team,Actual_Wins,Actual_Win%,BballRef_Proj_Wins,BballRef_Proj_Win%,Proj_Wins,Proj_Win%,G_Left,Wins,Pred_Wins_PAS,Pred_Win_Diff,Win%_PAS,Pred_Win%_PAS,Win%_PAS_Diff
0,East,Atlanta Hawks,43,0.524,40.9,0.499,42.5,0.518,24,15,14.455449,0.544551,0.625,0.60231,0.02269
1,East,Boston Celtics,51,0.622,47.8,0.583,47.9,0.584,22,17,13.889717,3.110283,0.773,0.631351,0.141376
2,East,Brooklyn Nets,44,0.537,42.2,0.514,41.8,0.509,23,13,10.76014,2.23986,0.565,0.467832,0.097385
3,East,Charlotte Hornets,43,0.524,39.8,0.485,38.0,0.464,22,14,9.038929,4.961071,0.636,0.41086,0.225503
4,East,Chicago Bulls,46,0.561,49.4,0.603,53.1,0.647,23,8,15.082566,-7.082566,0.348,0.655764,-0.307938


In [4]:
## Evaluate the RMSE between predicted wins using the Partial Least Squares Regression model and actual post All-Star break team wins

y_actual = final['Wins']
y_pred = final['Pred_Wins_PAS']

MSE = mean_squared_error(y_actual, y_pred)
RMSE = math.sqrt(MSE)

print("Root Mean Square Error:\n")
print(round(RMSE,2), 'wins')

Root Mean Square Error:

3.5 wins


In [5]:
## Evaluate the RMSE between predicted win % using the Partial Least Squares Regression model and actual post All-Star break team win %

y_actual = final['Win%_PAS']
y_pred = final['Pred_Win%_PAS']

MSE = mean_squared_error(y_actual, y_pred)
RMSE = math.sqrt(MSE)

print("Root Mean Square Error:\n")
print(round(RMSE,3), 'win %')

Root Mean Square Error:

0.152 win %


In [6]:
## Evaluate the RMSE between predicted wins from basketball-reference.com and actual post All-Star break team wins

y_actual = final['Actual_Wins']
y_pred = final['BballRef_Proj_Wins']

MSE = mean_squared_error(y_actual, y_pred)
RMSE = math.sqrt(MSE)

print("Root Mean Square Error:\n")
print(round(RMSE,3), 'wins')

Root Mean Square Error:

3.203 wins


In [7]:
final = final.rename(columns = {"Pred_Win_Diff": "Wins_over_Pred"})

In [8]:
## Which 5 teams were underperformers?

final.loc[:, ["Team", "Wins", "Wins_over_Pred"]].sort_values("Wins_over_Pred").head(5)

Unnamed: 0,Team,Wins,Wins_over_Pred
24,Portland Trail Blazers,2,-7.655692
4,Chicago Bulls,8,-7.082566
13,LA Lakers,6,-4.964549
9,GS Warriors,11,-4.255362
5,Cleveland Cavaliers,9,-4.244997


In [9]:
## Which 5 teams were overperformers?

final.loc[:, ["Team", "Wins", "Wins_over_Pred"]].sort_values("Wins_over_Pred", ascending=False).head(5)

Unnamed: 0,Team,Wins,Wins_over_Pred
8,Detroit Pistons,10,5.462839
3,Charlotte Hornets,14,4.961071
21,Orlando Magic,9,4.347875
18,New Orleans Pelicans,13,3.646139
17,Minnesota Timberwolves,15,3.367283
