In [88]:
import pandas as pd
import numpy as np

Import NBA statistic from NBA API (player statistics and teams standings)

In [89]:
from nba_api.stats.endpoints import leaguedashplayerstats, leaguestandings

 Get team standings and player statistics dataset 

In [90]:
#Standing dataset
standings = leaguestandings.LeagueStandings(season='2022-23').get_data_frames()[0]
standings

Unnamed: 0,LeagueID,SeasonID,TeamID,TeamCity,TeamName,Conference,ConferenceRecord,PlayoffRank,ClinchIndicator,Division,...,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,PreAS,PostAS
0,0,22022,1610612743,Denver,Nuggets,West,32-15,1,- nw,Northwest,...,,,,,,4-3,10-4,9-5,41-18,10-8
1,0,22022,1610612749,Milwaukee,Bucks,East,32-16,1,- c,Central,...,,,,,,6-0,9-5,8-7,41-17,14-5
2,0,22022,1610612738,Boston,Celtics,East,31-17,2,- x,Atlantic,...,,,,,,4-2,14-2,8-6,42-17,12-7
3,0,22022,1610612763,Memphis,Grizzlies,West,29-20,2,- sw,Southwest,...,,,,,,4-3,8-6,10-4,35-22,14-6
4,0,22022,1610612758,Sacramento,Kings,West,31-16,3,- x,Pacific,...,,,,,,2-4,9-5,8-6,32-25,15-5
5,0,22022,1610612755,Philadelphia,76ers,East,31-16,3,- x,Atlantic,...,,,,,,4-4,8-6,9-4,38-19,13-7
6,0,22022,1610612739,Cleveland,Cavaliers,East,31-17,4,- x,Central,...,,,,,,5-1,9-7,9-6,38-23,10-7
7,0,22022,1610612756,Phoenix,Suns,West,27-20,4,,Pacific,...,,,,,,5-1,10-5,5-11,32-28,10-7
8,0,22022,1610612752,New York,Knicks,East,30-19,5,,Atlantic,...,,,,,,3-3,7-9,9-6,33-27,12-6
9,0,22022,1610612746,LA,Clippers,West,24-24,5,,Pacific,...,,,,,,3-4,10-6,8-7,33-28,8-9


In [91]:
#Player statistics
player_stats = leaguedashplayerstats.LeagueDashPlayerStats(season='2022-23', season_type_all_star='Regular Season', per_mode_detailed='PerGame').get_data_frames()[0]
player_stats

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,22.0,13,5,8,0.385,...,475,397,473,478,448,341,485,234,35,473
1,1631260,AJ Green,AJ,1610612749,MIL,23.0,33,26,7,0.788,...,475,478,435,498,364,322,430,234,35,403
2,1631100,AJ Griffin,AJ,1610612737,ATL,19.0,68,31,37,0.456,...,345,290,384,405,212,159,278,234,35,262
3,203932,Aaron Gordon,Aaron,1610612743,DEN,27.0,65,44,21,0.677,...,70,32,215,54,81,2,76,60,35,87
4,1628988,Aaron Holiday,Aaron,1610612737,ATL,26.0,59,30,29,0.508,...,331,295,361,336,390,218,390,234,35,395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523,1628380,Zach Collins,Zach,1610612759,SAS,25.0,61,18,43,0.295,...,67,126,26,111,143,487,129,73,35,134
524,203897,Zach LaVine,Zach,1610612741,CHI,28.0,73,36,37,0.493,...,289,19,155,45,21,217,44,150,35,31
525,1630192,Zeke Nnaji,Zeke,1610612743,DEN,22.0,50,33,17,0.660,...,155,327,194,270,340,422,366,180,35,372
526,1630533,Ziaire Williams,Ziaire,1610612763,MEM,21.0,36,21,15,0.583,...,392,413,283,319,333,403,384,234,35,373


Merge team standings with player stats by Team ID

In [92]:
df = pd.merge(player_stats, standings, left_on='TEAM_ID', right_on='TeamID', how='outer')

### Feature engineering
Including these two features can help to capture important information about a team's performance that is not captured by other features such as points per game or rebounds per game.

In [93]:
df['AssistRatio'] = df['AST'] / (df['AST'] + df['TOV'])
#Indication of a player's ability to assist their teamates without turning the ball over

df['WinStreak'] = df['W'] - df['L']
#Indication of how many consecutive game has won (if value +) or lost (if value -)


Features are selected to calculate the percentage of winning 

In [94]:
feature_columns = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 'W_PCT', 'AssistRatio', 'WinStreak']

In [9]:
feature_weights = {
    'PTS': 0.1,
    'REB': 0.15,
    'AST': 0.15,
    'STL': 0.05,
    'BLK': 0.05,
    'TOV': -0.15,
    'FG_PCT': 0.1,
    'FT_PCT': 0.05,
    'FG3_PCT': 0.05,
    'W_PCT': 0.2,
    'AssistRatio': 0.15,
    'WinStreak': 0.1
}

### Aggregate team stats using 'TEAM_ID', 'Conference', 'TeamName', 'TeamCity', and 'PlayoffRank'

In [34]:
team_stats = df.groupby(['TEAM_ID', 'Conference', 'TeamName', 'TeamCity', 'PlayoffRank']).sum().reset_index()
team_stats

Unnamed: 0,TEAM_ID,Conference,TeamName,TeamCity,PlayoffRank,PLAYER_ID,AGE,GP,W,L,...,ClinchedConferenceTitle,ClinchedDivisionTitle,ClinchedPlayoffBirth,EliminatedConference,EliminatedDivision,PointsPG,OppPointsPG,DiffPointsPG,AssistRatio,WinStreak
0,1610612737,East,Hawks,Atlanta,8,24853796,413.0,880,408,472,...,0,0,0,0,0,2004.3,2004.3,0.0,9.913199,-64
1,1610612738,East,Celtics,Boston,2,20563850,465.0,851,574,277,...,0,0,17,0,0,2012.8,1900.6,110.5,10.803709,297
2,1610612739,East,Cavaliers,Cleveland,4,21990223,454.0,748,473,275,...,0,0,17,0,0,1909.1,1819.0,90.1,9.831896,198
3,1610612740,West,Pelicans,New Orleans,8,21787844,411.0,793,396,397,...,0,0,0,0,0,1827.2,1795.2,32.0,9.509723,-1
4,1610612741,East,Bulls,Chicago,10,20571173,450.0,807,386,421,...,0,0,0,0,0,1927.8,1907.4,20.4,9.16767,-35
5,1610612742,West,Mavericks,Dallas,11,21378108,594.0,830,407,423,...,0,0,0,0,0,2389.8,2379.3,10.5,12.00876,-16
6,1610612743,West,Nuggets,Denver,1,16085664,435.0,861,558,303,...,0,16,16,0,0,1857.6,1798.4,59.2,9.655471,255
7,1610612744,West,Warriors,Golden State,6,18944493,458.0,796,424,372,...,0,0,0,0,0,2011.1,1999.2,13.6,9.852409,52
8,1610612745,West,Rockets,Houston,15,24449916,346.0,747,177,570,...,0,0,0,0,0,1657.5,1782.0,-124.5,9.411122,-393
9,1610612746,West,Clippers,LA,5,16487674,516.0,896,439,457,...,0,0,0,0,0,2035.8,2028.6,5.4,11.543272,-18


### Calculate weighted score for each team

In [95]:
team_stats['Score'] = team_stats[feature_columns].apply(lambda x: sum(x * [feature_weights[col] for col in feature_columns]), axis=1)
team_stats['Score']

12    67.386009
1     63.448056
18    55.480555
2     49.194734
15    43.582207
14    36.087545
11    34.967297
24    33.490883
0     27.113730
4     25.021500
17    24.636192
27    21.486666
16    20.531074
29    10.451058
28    -8.661407
26    58.940092
6     58.529871
21    49.548001
19    42.078449
10    39.224225
7     38.984761
9     36.759941
3     33.846558
5     32.618814
13    32.144884
23    27.860479
25    27.528297
20    20.847313
22    -1.582634
8    -12.872232
Name: Score, dtype: float64

Sort teams by score and rank them


In [71]:
team_stats = team_stats.sort_values(['Conference', 'Score'], ascending=[True, False])
team_stats['Rank'] = team_stats.groupby('Conference').cumcount() + 1

Add a new column for the difference between the calculated rank and the PlayoffRank

In [72]:
team_stats['RankDifference'] = team_stats['Rank'] - team_stats['PlayoffRank']

Split teams into conferences

In [73]:
eastern_conference = team_stats[team_stats['Conference'] == 'East'][['TeamCity', 'TeamName', 'Rank', 'PlayoffRank', 'RankDifference']]
western_conference = team_stats[team_stats['Conference'] == 'West'][['TeamCity', 'TeamName', 'Rank', 'PlayoffRank', 'RankDifference']]

### Ranking the teams
Ranking the team by the feature weight score and showing the difference between PlayoffRank in standings dataset

In [97]:
print("Eastern Conference Rankings:\n", eastern_conference)
print("Western Conference Rankings:\n", western_conference)

Eastern Conference Rankings:
         TeamCity   TeamName  Rank  PlayoffRank  RankDifference
12     Milwaukee      Bucks     1            1               0
1         Boston    Celtics     2            2               0
18  Philadelphia      76ers     3            3               0
2      Cleveland  Cavaliers     4            4               0
15      New York     Knicks     5            5               0
14      Brooklyn       Nets     6            6               0
11         Miami       Heat     7            7               0
24       Toronto    Raptors     8            9              -1
0        Atlanta      Hawks     9            8               1
4        Chicago      Bulls    10           10               0
17       Indiana     Pacers    11           12              -1
27    Washington    Wizards    12           11               1
16       Orlando      Magic    13           13               0
29     Charlotte    Hornets    14           14               0
28       Detroit    Pisto

### Predict the top-ranked teams from each conference as NBA Finals contenders

In [75]:
eastern_finalist = team_stats[team_stats['Conference'] == 'East'].iloc[0]
western_finalist = team_stats[team_stats['Conference'] == 'West'].iloc[0]

Normalize the scores of the two finalists



In [98]:
total_score = eastern_finalist['Score'] + western_finalist['Score']
eastern_finalist['NormalizedScore'] = eastern_finalist['Score'] / total_score
western_finalist['NormalizedScore'] = western_finalist['Score'] / total_score

Calculate the winning percentage for each finalist

In [99]:
eastern_finalist['WinningPercentage'] = eastern_finalist['NormalizedScore'] / (eastern_finalist['NormalizedScore'] + western_finalist['NormalizedScore'])
western_finalist['WinningPercentage'] = western_finalist['NormalizedScore'] / (eastern_finalist['NormalizedScore'] + western_finalist['NormalizedScore'])

Display the predicted NBA Finals contenders and their winning percentages

In [103]:
print("Eastern Conference Finalist:\n", eastern_finalist[['TeamCity', 'TeamName', 'Score','WinningPercentage']])
print("Western Conference Finalist:\n", western_finalist[['TeamCity', 'TeamName', 'Score','WinningPercentage']])

Eastern Conference Finalist:
 TeamCity             Milwaukee
TeamName                 Bucks
Score                67.386009
WinningPercentage     0.533429
Name: 12, dtype: object
Western Conference Finalist:
 TeamCity               Memphis
TeamName             Grizzlies
Score                58.940092
WinningPercentage     0.466571
Name: 26, dtype: object


### Predict the champion 

In [101]:
if eastern_finalist['WinningPercentage'] > western_finalist['WinningPercentage']:
    champion = eastern_finalist
else:
    champion = western_finalist

print("Predicted NBA Champion:\n", champion[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'WinningPercentage']])


Predicted NBA Champion:
 TeamCity             Milwaukee
TeamName                 Bucks
Score                67.386009
NormalizedScore       0.533429
WinningPercentage     0.533429
Name: 12, dtype: object


### Combining the calculated scores with the playoff ranks to predict the NBA champion. 

By taking the reciprocal of the PlayoffRank, we create a new metric where higher values are considered better. This makes it easier to work with the other metrics in our model, where higher values are also considered better.

In [80]:
team_stats['InversePlayoffRank'] = 1 / team_stats['PlayoffRank']
#The InversePlayoffRank is calculated as the reciprocal of the PlayoffRank. 
#The PlayoffRank is a ranking of the teams where a lower rank is considered better. 
#The best team has a rank of 1, the second-best team has a rank of 2, and so on.

In [81]:
team_stats['NormalizedScore'] = team_stats['Score'] / team_stats['Score'].sum()

In [82]:
eastern_finalist = team_stats[team_stats['Conference'] == 'East'].iloc[0]
western_finalist = team_stats[team_stats['Conference'] == 'West'].iloc[0]

In [83]:
eastern_finalist['NormalizedInversePlayoffRank'] = eastern_finalist['InversePlayoffRank'] / (eastern_finalist['InversePlayoffRank'] + western_finalist['InversePlayoffRank'])
western_finalist['NormalizedInversePlayoffRank'] = western_finalist['InversePlayoffRank'] / (eastern_finalist['InversePlayoffRank'] + western_finalist['InversePlayoffRank'])

The playoff_rank_weight determines the importance of the playoff rank in the prediction.

In [84]:
playoff_rank_weight = 0.3

In [85]:
eastern_finalist['AdjustedWinningPercentage'] = (1 - playoff_rank_weight) * eastern_finalist['NormalizedScore'] + playoff_rank_weight * eastern_finalist['NormalizedInversePlayoffRank']
western_finalist['AdjustedWinningPercentage'] = (1 - playoff_rank_weight) * western_finalist['NormalizedScore'] + playoff_rank_weight * western_finalist['NormalizedInversePlayoffRank']

In [102]:
print("Eastern Conference Finalist:\n", eastern_finalist[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'AdjustedWinningPercentage']])
print("Western Conference Finalist:\n", western_finalist[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'AdjustedWinningPercentage']])

Eastern Conference Finalist:
 TeamCity                     Milwaukee
TeamName                         Bucks
Score                        67.386009
NormalizedScore               0.533429
AdjustedWinningPercentage     0.247711
Name: 12, dtype: object
Western Conference Finalist:
 TeamCity                       Memphis
TeamName                     Grizzlies
Score                        58.940092
NormalizedScore               0.466571
AdjustedWinningPercentage     0.141731
Name: 26, dtype: object


In [86]:
if eastern_finalist['AdjustedWinningPercentage'] > western_finalist['AdjustedWinningPercentage']:
    champion = eastern_finalist
else:
    champion = western_finalist

print("Predicted NBA Champion:\n", champion[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'AdjustedWinningPercentage']])

Predicted NBA Champion:
 TeamCity                     Milwaukee
TeamName                         Bucks
Score                        67.386009
NormalizedScore               0.068158
AdjustedWinningPercentage     0.247711
Name: 12, dtype: object
