In [1]:
import pandas as pd
import numpy as np

Import NBA statistic from NBA API (player statistics and teams standings)

In [2]:
from nba_api.stats.endpoints import leaguedashplayerstats, leaguestandings

 Get team standings and player statistics dataset 

In [3]:
#Standing dataset
standings = leaguestandings.LeagueStandings(season='2022-23').get_data_frames()[0]
standings

Unnamed: 0,LeagueID,SeasonID,TeamID,TeamCity,TeamName,Conference,ConferenceRecord,PlayoffRank,ClinchIndicator,Division,...,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,PreAS,PostAS
0,0,22022,1610612743,Denver,Nuggets,West,34-18,1,- w,Northwest,...,,,,,,4-3,10-4,9-5,41-18,12-11
1,0,22022,1610612749,Milwaukee,Bucks,East,35-17,1,- e,Central,...,,,,,,6-0,9-5,8-7,41-17,17-7
2,0,22022,1610612738,Boston,Celtics,East,34-18,2,- a,Atlantic,...,,,,,,4-2,14-2,8-6,42-17,15-8
3,0,22022,1610612763,Memphis,Grizzlies,West,30-22,2,- sw,Southwest,...,,,,,,4-3,8-6,10-4,35-22,16-9
4,0,22022,1610612758,Sacramento,Kings,West,32-20,3,- p,Pacific,...,,,,,,2-4,9-5,8-6,32-25,16-9
5,0,22022,1610612755,Philadelphia,76ers,East,34-18,3,- x,Atlantic,...,,,,,,4-4,8-6,9-4,38-19,16-9
6,0,22022,1610612739,Cleveland,Cavaliers,East,34-18,4,- x,Central,...,,,,,,5-1,9-7,9-6,38-23,13-8
7,0,22022,1610612756,Phoenix,Suns,West,30-22,4,- x,Pacific,...,,,,,,5-1,10-5,5-11,32-28,13-9
8,0,22022,1610612752,New York,Knicks,East,32-20,5,- x,Atlantic,...,,,,,,3-3,7-9,9-6,33-27,14-8
9,0,22022,1610612746,LA,Clippers,West,27-25,5,- x,Pacific,...,,,,,,3-4,10-6,8-7,33-28,11-10


In [4]:
#Player statistics
player_stats = leaguedashplayerstats.LeagueDashPlayerStats(season='2022-23', season_type_all_star='Regular Season', per_mode_detailed='PerGame').get_data_frames()[0]
player_stats

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,22.0,15,5,10,0.333,...,491,358,475,457,429,462,482,253,39,466
1,1631260,AJ Green,AJ,1610612749,MIL,23.0,35,27,8,0.771,...,491,498,454,518,392,309,455,253,39,436
2,1631100,AJ Griffin,AJ,1610612737,ATL,19.0,72,34,38,0.472,...,372,268,394,422,217,169,287,253,39,273
3,203932,Aaron Gordon,Aaron,1610612743,DEN,27.0,68,45,23,0.662,...,65,32,205,58,82,4,77,56,39,89
4,1628988,Aaron Holiday,Aaron,1610612737,ATL,26.0,63,32,31,0.508,...,345,315,377,356,415,212,413,253,39,416
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534,1628380,Zach Collins,Zach,1610612759,SAS,25.0,63,20,43,0.317,...,63,123,19,113,142,486,130,70,39,131
535,203897,Zach LaVine,Zach,1610612741,CHI,28.0,77,38,39,0.494,...,305,27,167,47,21,216,44,155,39,33
536,1630192,Zeke Nnaji,Zeke,1610612743,DEN,22.0,53,34,19,0.642,...,162,339,181,282,351,391,380,192,39,389
537,1630533,Ziaire Williams,Ziaire,1610612763,MEM,21.0,37,21,16,0.568,...,385,432,298,312,332,420,387,253,39,367


Merge team standings with player stats by Team ID

In [5]:
df = pd.merge(player_stats, standings, left_on='TEAM_ID', right_on='TeamID', how='outer')

### Feature engineering
Including these two features can help to capture important information about a team's performance that is not captured by other features such as points per game or rebounds per game.

In [6]:
df['AssistRatio'] = df['AST'] / (df['AST'] + df['TOV'])
#Indication of a player's ability to assist their teamates without turning the ball over

df['WinStreak'] = df['W'] - df['L']
#Indication of how many consecutive game has won (if value +) or lost (if value -)


Features are selected to calculate the percentage of winning 

In [7]:
feature_columns = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 'W_PCT', 'AssistRatio', 'WinStreak']

In [8]:
feature_weights = {
    'PTS': 0.1,
    'REB': 0.15,
    'AST': 0.15,
    'STL': 0.05,
    'BLK': 0.05,
    'TOV': -0.15,
    'FG_PCT': 0.1,
    'FT_PCT': 0.05,
    'FG3_PCT': 0.05,
    'W_PCT': 0.2,
    'AssistRatio': 0.15,
    'WinStreak': 0.1
}

### Aggregate team stats using 'TEAM_ID', 'Conference', 'TeamName', 'TeamCity', and 'PlayoffRank'

In [9]:
team_stats = df.groupby(['TEAM_ID', 'Conference', 'TeamName', 'TeamCity', 'PlayoffRank']).sum().reset_index()
team_stats

Unnamed: 0,TEAM_ID,Conference,TeamName,TeamCity,PlayoffRank,PLAYER_ID,AGE,GP,W,L,...,ClinchedConferenceTitle,ClinchedDivisionTitle,ClinchedPlayoffBirth,EliminatedConference,EliminatedDivision,PointsPG,OppPointsPG,DiffPointsPG,AssistRatio,WinStreak
0,1610612737,East,Hawks,Atlanta,7,26485291,435.0,939,445,494,...,0,0,0,0,0,2131.2,2125.8,5.4,10.401361,-49
1,1610612738,East,Celtics,Boston,2,22194401,486.0,896,608,288,...,0,18,18,0,0,2122.2,2005.2,117.0,11.734996,320
2,1610612739,East,Cavaliers,Cleveland,4,21990223,457.0,788,501,287,...,0,0,17,0,0,1909.1,1817.3,91.8,10.80043,214
3,1610612740,West,Pelicans,New Orleans,9,21787844,413.0,840,423,417,...,0,0,0,0,0,1830.4,1800.0,30.4,9.541825,6
4,1610612741,East,Bulls,Chicago,10,20571173,450.0,866,421,445,...,0,0,0,0,0,1922.7,1900.6,22.1,9.303988,-24
5,1610612742,West,Mavericks,Dallas,11,21378108,595.0,881,417,464,...,0,0,0,21,21,2398.2,2396.1,2.1,12.052029,-47
6,1610612743,West,Nuggets,Denver,1,16085664,438.0,914,578,336,...,16,16,16,0,0,1852.8,1800.0,52.8,9.778387,242
7,1610612744,West,Warriors,Golden State,6,18944493,458.0,842,461,381,...,0,0,17,0,0,2021.3,1990.7,30.6,10.228411,80
8,1610612745,West,Rockets,Houston,14,24449916,347.0,787,206,581,...,0,0,0,0,0,1660.5,1779.0,-118.5,9.406055,-375
9,1610612746,West,Clippers,LA,5,16487674,516.0,939,473,466,...,0,0,18,0,0,2044.8,2035.8,9.0,11.564659,7


### Calculate weighted score for each team

In [10]:
team_stats['Score'] = team_stats[feature_columns].apply(lambda x: sum(x * [feature_weights[col] for col in feature_columns]), axis=1)
team_stats['Score']

0     29.458754
1     67.301399
2     52.801514
3     34.614574
4     26.735548
5     30.173454
6     57.756108
7     42.232262
8    -10.856992
9     39.179999
10    45.149108
11    39.767110
12    70.533764
13    34.363224
14    41.690187
15    42.528090
16    20.325458
17    24.828073
18    62.130402
19    44.022960
20    22.268845
21    46.630338
22     2.491910
23    28.705264
24    34.226991
25    26.847997
26    60.700790
27    21.422242
28   -11.095207
29    10.131525
Name: Score, dtype: float64

Sort teams by score and rank them


In [11]:
team_stats = team_stats.sort_values(['Conference', 'Score'], ascending=[True, False])
team_stats['Rank'] = team_stats.groupby('Conference').cumcount() + 1

Add a new column for the difference between the calculated rank and the PlayoffRank

In [12]:
team_stats['RankDifference'] = team_stats['Rank'] - team_stats['PlayoffRank']

Split teams into conferences

In [13]:
eastern_conference = team_stats[team_stats['Conference'] == 'East'][['TeamCity', 'TeamName', 'Rank', 'PlayoffRank', 'RankDifference']]
western_conference = team_stats[team_stats['Conference'] == 'West'][['TeamCity', 'TeamName', 'Rank', 'PlayoffRank', 'RankDifference']]

### Ranking the teams
Ranking the team by the feature weight score and showing the difference between PlayoffRank in standings dataset

In [14]:
print("Eastern Conference Rankings:\n", eastern_conference)
print("Western Conference Rankings:\n", western_conference)

Eastern Conference Rankings:
         TeamCity   TeamName  Rank  PlayoffRank  RankDifference
12     Milwaukee      Bucks     1            1               0
1         Boston    Celtics     2            2               0
18  Philadelphia      76ers     3            3               0
2      Cleveland  Cavaliers     4            4               0
15      New York     Knicks     5            5               0
14      Brooklyn       Nets     6            6               0
11         Miami       Heat     7            8              -1
24       Toronto    Raptors     8            9              -1
0        Atlanta      Hawks     9            7               2
4        Chicago      Bulls    10           10               0
17       Indiana     Pacers    11           11               0
27    Washington    Wizards    12           12               0
16       Orlando      Magic    13           13               0
29     Charlotte    Hornets    14           14               0
28       Detroit    Pisto

### Predict the top-ranked teams from each conference as NBA Finals contenders

In [15]:
eastern_finalist = team_stats[team_stats['Conference'] == 'East'].iloc[0]
western_finalist = team_stats[team_stats['Conference'] == 'West'].iloc[0]

Normalize the scores of the two finalists



In [16]:
total_score = eastern_finalist['Score'] + western_finalist['Score']
eastern_finalist['NormalizedScore'] = eastern_finalist['Score'] / total_score
western_finalist['NormalizedScore'] = western_finalist['Score'] / total_score

Calculate the winning percentage for each finalist

In [17]:
eastern_finalist['WinningPercentage'] = eastern_finalist['NormalizedScore'] / (eastern_finalist['NormalizedScore'] + western_finalist['NormalizedScore'])
western_finalist['WinningPercentage'] = western_finalist['NormalizedScore'] / (eastern_finalist['NormalizedScore'] + western_finalist['NormalizedScore'])

Display the predicted NBA Finals contenders and their winning percentages

In [18]:
print("Eastern Conference Finalist:\n", eastern_finalist[['TeamCity', 'TeamName', 'Score','WinningPercentage']])
print("Western Conference Finalist:\n", western_finalist[['TeamCity', 'TeamName', 'Score','WinningPercentage']])

Eastern Conference Finalist:
 TeamCity             Milwaukee
TeamName                 Bucks
Score                70.533764
WinningPercentage     0.537463
Name: 12, dtype: object
Western Conference Finalist:
 TeamCity               Memphis
TeamName             Grizzlies
Score                 60.70079
WinningPercentage     0.462537
Name: 26, dtype: object


### Predict the champion 

In [19]:
if eastern_finalist['WinningPercentage'] > western_finalist['WinningPercentage']:
    champion = eastern_finalist
else:
    champion = western_finalist

print("Predicted NBA Champion:\n", champion[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'WinningPercentage']])


Predicted NBA Champion:
 TeamCity             Milwaukee
TeamName                 Bucks
Score                70.533764
NormalizedScore       0.537463
WinningPercentage     0.537463
Name: 12, dtype: object


### Combining the calculated scores with the playoff ranks to predict the NBA champion. 

By taking the reciprocal of the PlayoffRank, we create a new metric where higher values are considered better. This makes it easier to work with the other metrics in our model, where higher values are also considered better.

In [20]:
team_stats['InversePlayoffRank'] = 1 / team_stats['PlayoffRank']
#The InversePlayoffRank is calculated as the reciprocal of the PlayoffRank. 
#The PlayoffRank is a ranking of the teams where a lower rank is considered better. 
#The best team has a rank of 1, the second-best team has a rank of 2, and so on.

In [21]:
team_stats['NormalizedScore'] = team_stats['Score'] / team_stats['Score'].sum()

In [22]:
eastern_finalist = team_stats[team_stats['Conference'] == 'East'].iloc[0]
western_finalist = team_stats[team_stats['Conference'] == 'West'].iloc[0]

In [23]:
eastern_finalist['NormalizedInversePlayoffRank'] = eastern_finalist['InversePlayoffRank'] / (eastern_finalist['InversePlayoffRank'] + western_finalist['InversePlayoffRank'])
western_finalist['NormalizedInversePlayoffRank'] = western_finalist['InversePlayoffRank'] / (eastern_finalist['InversePlayoffRank'] + western_finalist['InversePlayoffRank'])

The playoff_rank_weight determines the importance of the playoff rank in the prediction.

In [24]:
playoff_rank_weight = 0.3

In [25]:
eastern_finalist['AdjustedWinningPercentage'] = (1 - playoff_rank_weight) * eastern_finalist['NormalizedScore'] + playoff_rank_weight * eastern_finalist['NormalizedInversePlayoffRank']
western_finalist['AdjustedWinningPercentage'] = (1 - playoff_rank_weight) * western_finalist['NormalizedScore'] + playoff_rank_weight * western_finalist['NormalizedInversePlayoffRank']

In [26]:
print("Eastern Conference Finalist:\n", eastern_finalist[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'AdjustedWinningPercentage']])
print("Western Conference Finalist:\n", western_finalist[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'AdjustedWinningPercentage']])

Eastern Conference Finalist:
 TeamCity                     Milwaukee
TeamName                         Bucks
Score                        70.533764
NormalizedScore               0.068013
AdjustedWinningPercentage     0.247609
Name: 12, dtype: object
Western Conference Finalist:
 TeamCity                       Memphis
TeamName                     Grizzlies
Score                         60.70079
NormalizedScore               0.058531
AdjustedWinningPercentage     0.140972
Name: 26, dtype: object


In [27]:
if eastern_finalist['AdjustedWinningPercentage'] > western_finalist['AdjustedWinningPercentage']:
    champion = eastern_finalist
else:
    champion = western_finalist

print("Predicted NBA Champion:\n", champion[['TeamCity', 'TeamName', 'Score', 'NormalizedScore', 'AdjustedWinningPercentage']])

Predicted NBA Champion:
 TeamCity                     Milwaukee
TeamName                         Bucks
Score                        70.533764
NormalizedScore               0.068013
AdjustedWinningPercentage     0.247609
Name: 12, dtype: object
