In [1]:
import pandas as pd
import numpy as np

Import NBA statistic from NBA API (player statistics and teams standings)

In [2]:
from nba_api.stats.endpoints import leaguedashplayerstats, leaguestandings

This script uses the RandomForestClassifier from the scikit-learn library to train the model.

In [3]:
from sklearn.model_selection import train_test_split 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

# Collect data from NBA API

Get team standings dataset 

In [4]:
standings = leaguestandings.LeagueStandings(season='2022-23').get_data_frames()[0]

In [5]:
standings

Unnamed: 0,LeagueID,SeasonID,TeamID,TeamCity,TeamName,Conference,ConferenceRecord,PlayoffRank,ClinchIndicator,Division,...,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,PreAS,PostAS
0,0,22022,1610612743,Denver,Nuggets,West,32-14,1,- nw,Northwest,...,,,,,,4-3,10-4,9-5,41-18,10-7
1,0,22022,1610612749,Milwaukee,Bucks,East,32-16,1,- c,Central,...,,,,,,6-0,9-5,8-7,41-17,14-5
2,0,22022,1610612738,Boston,Celtics,East,31-17,2,- x,Atlantic,...,,,,,,4-2,14-2,8-6,42-17,11-7
3,0,22022,1610612763,Memphis,Grizzlies,West,28-20,2,- sw,Southwest,...,,,,,,4-3,8-6,10-4,35-22,13-6
4,0,22022,1610612758,Sacramento,Kings,West,30-16,3,- x,Pacific,...,,,,,,2-4,9-5,8-6,32-25,14-5
5,0,22022,1610612755,Philadelphia,76ers,East,30-16,3,- x,Atlantic,...,,,,,,4-4,8-6,9-4,38-19,12-7
6,0,22022,1610612739,Cleveland,Cavaliers,East,31-16,4,- x,Central,...,,,,,,5-1,9-7,9-6,38-23,10-6
7,0,22022,1610612756,Phoenix,Suns,West,26-20,4,,Pacific,...,,,,,,5-1,10-5,5-11,32-28,9-7
8,0,22022,1610612752,New York,Knicks,East,29-19,5,,Atlantic,...,,,,,,3-3,7-9,9-6,33-27,11-6
9,0,22022,1610612746,LA,Clippers,West,24-23,5,,Pacific,...,,,,,,3-4,10-6,8-7,33-28,8-8


Get player stats data

In [6]:
player_stats = leaguedashplayerstats.LeagueDashPlayerStats(season='2022-23', season_type_all_star='Regular Season', per_mode_detailed='PerGame').get_data_frames()[0]
player_stats

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,...,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,22.0,13,5,8,0.385,...,474,393,469,474,445,338,480,232,35,468
1,1631260,AJ Green,AJ,1610612749,MIL,23.0,33,26,7,0.788,...,474,475,432,494,361,316,428,232,35,401
2,1631100,AJ Griffin,AJ,1610612737,ATL,19.0,67,31,36,0.463,...,343,286,375,405,213,162,274,232,35,262
3,203932,Aaron Gordon,Aaron,1610612743,DEN,27.0,64,44,20,0.688,...,69,34,203,58,82,3,79,59,35,87
4,1628988,Aaron Holiday,Aaron,1610612737,ATL,26.0,58,30,28,0.517,...,327,293,352,331,387,225,386,232,35,391
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518,1628380,Zach Collins,Zach,1610612759,SAS,25.0,61,18,43,0.295,...,66,125,25,112,142,485,128,71,35,133
519,203897,Zach LaVine,Zach,1610612741,CHI,28.0,72,35,37,0.486,...,286,22,153,45,21,245,44,149,35,31
520,1630192,Zeke Nnaji,Zeke,1610612743,DEN,22.0,49,33,16,0.673,...,148,337,200,260,341,420,363,180,35,371
521,1630533,Ziaire Williams,Ziaire,1610612763,MEM,21.0,36,21,15,0.583,...,387,407,283,319,329,402,381,232,35,367


# Prepare the dataset for training and testing 

Merge team standings with player stats by Team ID 

In [7]:
df = pd.merge(player_stats, standings, left_on='TEAM_ID', right_on='TeamID', how='outer')

In [8]:
df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,...,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,PreAS,PostAS
0,1630639,A.J. Lawson,A.J.,1610612742,DAL,22.0,13,5,8,0.385,...,,,,,,3-3,7-7,11-6,31-29,6-11
1,1629735,Chris Silva,Chris,1610612742,DAL,26.0,1,1,0,1.000,...,,,,,,3-3,7-7,11-6,31-29,6-11
2,1626174,Christian Wood,Christian,1610612742,DAL,27.0,64,32,32,0.500,...,,,,,,3-3,7-7,11-6,31-29,6-11
3,202722,Davis Bertans,Davis,1610612742,DAL,30.0,42,20,22,0.476,...,,,,,,3-3,7-7,11-6,31-29,6-11
4,203939,Dwight Powell,Dwight,1610612742,DAL,31.0,72,36,36,0.500,...,,,,,,3-3,7-7,11-6,31-29,6-11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518,1629011,Mitchell Robinson,Mitchell,1610612752,NYK,24.0,55,32,23,0.582,...,,,,,,3-3,7-9,9-6,33-27,11-6
519,1630167,Obi Toppin,Obi,1610612752,NYK,25.0,62,34,28,0.548,...,,,,,,3-3,7-9,9-6,33-27,11-6
520,1629656,Quentin Grimes,Quentin,1610612752,NYK,22.0,66,38,28,0.576,...,,,,,,3-3,7-9,9-6,33-27,11-6
521,1629628,RJ Barrett,RJ,1610612752,NYK,22.0,70,39,31,0.557,...,,,,,,3-3,7-9,9-6,33-27,11-6


Feature engineering. Including these two features in the machine learning model can help to capture important information about a team's performance that is not captured by other features such as points per game or rebounds per game.

In [9]:
df['AssistRatio'] = df['AST'] / (df['AST'] + df['TOV'])
#Indication of a player's ability to assist their teamates without turning the ball over

df['WinStreak'] = df['W'] - df['L']
#Indication of how many consecutive game has won (if value +) or lost (if value -)


Feature selection 

In [10]:
feature_columns = ['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'FG_PCT', 'FT_PCT', 'FG3_PCT', 'W_PCT', 'AssistRatio', 'WinStreak']
X = df[feature_columns]
y = df['PlayoffRank']

In [11]:
X

Unnamed: 0,PTS,REB,AST,STL,BLK,TOV,FG_PCT,FT_PCT,FG3_PCT,W_PCT,AssistRatio,WinStreak
0,2.6,0.8,0.1,0.1,0.0,0.1,0.500,0.333,0.438,0.385,0.500000,-3
1,2.0,0.0,0.0,0.0,0.0,1.0,1.000,0.000,0.000,1.000,0.000000,1
2,16.8,7.4,1.8,0.5,1.0,1.8,0.518,0.778,0.368,0.500,0.500000,0
3,4.2,1.0,0.5,0.2,0.1,0.2,0.428,0.833,0.393,0.476,0.714286,-2
4,6.9,4.2,0.9,0.7,0.3,0.9,0.727,0.669,0.000,0.500,0.500000,0
...,...,...,...,...,...,...,...,...,...,...,...,...
518,7.2,9.0,0.8,0.9,1.7,0.6,0.690,0.491,0.000,0.582,0.571429,9
519,6.3,2.8,0.8,0.3,0.2,0.5,0.418,0.784,0.327,0.548,0.615385,6
520,10.4,3.3,2.0,0.6,0.4,1.0,0.457,0.791,0.376,0.576,0.666667,10
521,19.6,5.0,2.7,0.4,0.2,2.3,0.435,0.743,0.318,0.557,0.540000,8


In [12]:
# Impute missing values with mean value
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

In [13]:
X

Unnamed: 0,PTS,REB,AST,STL,BLK,TOV,FG_PCT,FT_PCT,FG3_PCT,W_PCT,AssistRatio,WinStreak
0,2.6,0.8,0.1,0.1,0.0,0.1,0.500,0.333,0.438,0.385,0.500000,-3.0
1,2.0,0.0,0.0,0.0,0.0,1.0,1.000,0.000,0.000,1.000,0.000000,1.0
2,16.8,7.4,1.8,0.5,1.0,1.8,0.518,0.778,0.368,0.500,0.500000,0.0
3,4.2,1.0,0.5,0.2,0.1,0.2,0.428,0.833,0.393,0.476,0.714286,-2.0
4,6.9,4.2,0.9,0.7,0.3,0.9,0.727,0.669,0.000,0.500,0.500000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
518,7.2,9.0,0.8,0.9,1.7,0.6,0.690,0.491,0.000,0.582,0.571429,9.0
519,6.3,2.8,0.8,0.3,0.2,0.5,0.418,0.784,0.327,0.548,0.615385,6.0
520,10.4,3.3,2.0,0.6,0.4,1.0,0.457,0.791,0.376,0.576,0.666667,10.0
521,19.6,5.0,2.7,0.4,0.2,2.3,0.435,0.743,0.318,0.557,0.540000,8.0


In [14]:
y

0      11
1      11
2      11
3      11
4      11
       ..
518     5
519     5
520     5
521     5
522     5
Name: PlayoffRank, Length: 523, dtype: int64

 Split dataset into training and testing sets

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train a machine learning model

Instantiate and train the RandomForestClassifier

In [16]:
clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
clf.fit(X_train, y_train)

# Test the model to find the most accurate prediction

Predict the test set results

In [17]:
y_pred = clf.predict(X_test)

In [18]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


Accuracy: 0.2366412213740458


In [19]:
# Predict the champion
df['predicted_rank'] = clf.predict(X)
predicted_champion = df.loc[df['predicted_rank'].idxmin()]['TeamName']
print(f"Predicted NBA Champion: {predicted_champion}")

Predicted NBA Champion: Bucks


In [23]:
# Assign weights to each feature based on their importance
feature_weights = {
    'PTS': 0.1,
    'REB': 0.15,
    'AST': 0.15,
    'STL': 0.05,
    'BLK': 0.05,
    'TOV': -0.15,  # Negative weight, as fewer turnovers are better
    'FG_PCT': 0.1,
    'FT_PCT': 0.05,
    'FG3_PCT': 0.05,
    'W_PCT': 0.2,
    'AssistRatio': 0.15,
    'WinStreak': 0.1
}

In [24]:
# Calculate the weighted score for each team
df['WeightedScore'] = df[feature_columns].apply(lambda row: sum(row[col] * feature_weights[col] for col in feature_columns), axis=1)

# Rank the teams based on the weighted score separately for each conference
df['Rank'] = df.groupby('CONFERENCE')['WeightedScore'].rank(ascending=False, method='min')
df_ranked = df[['TEAM_ID', 'WeightedScore', 'Rank', 'CONFERENCE']].drop_duplicates().sort_values(['CONFERENCE', 'Rank'])

# Merge the ranked teams with the standings dataset
df_with_playoff_rank = pd.merge(df_ranked, standings, left_on='TEAM_ID', right_on='TeamID', how='outer')

# Compare our ranking with the playoff rankings
df_with_playoff_rank['PlayoffRankDiff'] = df_with_playoff_rank['Rank'] - df_with_playoff_rank['PlayoffRank']

# Predict the championship team
predicted_champion = df_with_playoff_rank.loc[df_with_playoff_rank['PlayoffRankDiff'].idxmin()]


KeyError: 'CONFERENCE'

In [None]:
# Step 1: Calculate the weighted score for each team based on feature weights
for feature, weight in feature_weights.items():
    df[feature + '_Weighted'] = df[feature] * weight

# Step 2: Calculate the average weighted score for each team
weighted_columns = [feature + '_Weighted' for feature in feature_columns]
df['WeightedScore'] = df[weighted_columns].sum(axis=1)

# Step 3: Rank the teams based on their average weighted score
df['Rank'] = df['WeightedScore'].rank(ascending=False)

# Step 4: Divide the teams into two conferences and display the rankings
eastern_teams = df[df['Conference'] == 'East'].sort_values('Rank')
western_teams = df[df['Conference'] == 'West'].sort_values('Rank')

print("Eastern Conference Teams:")
print(eastern_teams[['TeamName', 'Rank', 'WeightedScore']])

print("\nWestern Conference Teams:")
print(western_teams[['TeamName', 'Rank', 'WeightedScore']])

# Step 5: Predict the teams that will go to the final
eastern_finalist = eastern_teams.iloc[0]
western_finalist = western_teams.iloc[0]

print("\nEastern Conference Finalist:")
print(eastern_finalist[['TeamName', 'Rank', 'WeightedScore']])

print("\nWestern Conference Finalist:")
print(western_finalist[['TeamName', 'Rank', 'WeightedScore']])

# Step 6: Determine the champion based on the highest average weighted score
champion = eastern_finalist if eastern_finalist['WeightedScore'] > western_finalist['WeightedScore'] else western_finalist

print("\nNBA Champion:")
print(champion[['TeamName', 'Rank', 'WeightedScore']])


In [21]:
# Calculate the weighted score for each team
normalized_team_averages['WeightedScore'] = normalized_team_averages[feature_weights.keys()].dot(pd.Series(feature_weights))
normalized_team_averages['WeightedScore']

NameError: name 'normalized_team_averages' is not defined

In [22]:
# Merge the 'Conference' column from the 'standings' dataframe with the 'normalized_team_averages' dataframe
normalized_team_averages = pd.merge(normalized_team_averages, standings[['TeamID', 'TeamName', 'Conference']], left_index=True, right_on='TeamID')
normalized_team_averages

NameError: name 'normalized_team_averages' is not defined

In [91]:
# Sort the teams by conference and weighted score (in descending order)
ranked_teams_by_weighted_score = normalized_team_averages.sort_values(['Conference', 'WeightedScore'], ascending=[True, False])
ranked_teams_by_weighted_score

Unnamed: 0_level_0,PTS,REB,AST,STL,BLK,TOV,FG_PCT,FT_PCT,FG3_PCT,W_PCT,AssistRatio,WinStreak,WeightedScore,WeightedRank,TeamScore,ConferenceWeightedRank,Conference
TeamName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Nuggets,0.672679,0.667735,0.708473,0.514401,0.484649,0.504505,1.0,0.399959,1.0,0.886761,0.50321,0.975246,0.768332,1.0,1.665684,29.0,
Suns,1.0,0.787927,0.979027,0.777074,0.981725,0.808559,0.399154,0.789537,0.52769,0.630922,0.729216,0.644614,0.737504,2.0,1.65625,30.0,
Grizzlies,0.633528,0.793112,0.460126,0.844673,0.922027,0.395866,0.412146,0.344491,0.322001,0.837871,0.749218,0.904879,0.725278,3.0,1.691565,28.0,
Knicks,0.775239,1.0,0.533557,0.507834,0.497076,0.423423,0.355236,0.530857,0.266736,0.739494,0.865482,0.744702,0.721884,4.0,1.802033,26.0,
Celtics,0.497099,0.793112,0.418674,0.334779,0.727096,0.279279,0.47864,0.364501,0.392142,0.909404,0.754917,0.97967,0.721461,5.0,1.784693,27.0,
Pelicans,0.908038,0.771902,0.733641,0.990495,0.463938,0.853604,0.500356,0.651792,0.42738,0.571171,0.791049,0.576501,0.655852,6.0,1.815708,24.0,
Raptors,0.453398,0.575798,0.253797,1.0,0.612188,0.108582,0.596432,0.193074,0.258121,0.610096,1.0,0.570078,0.645331,7.0,1.810283,25.0,
Thunder,0.62436,0.707799,0.481963,0.924827,0.62963,0.448198,0.281403,0.583818,0.630517,0.538548,0.920697,0.559473,0.642012,8.0,1.826832,23.0,
Warriors,0.791138,0.703793,0.979027,0.777074,0.422515,1.0,0.551911,0.90692,0.557249,0.579941,0.556485,0.637519,0.633128,9.0,1.912064,20.0,
Clippers,0.857814,0.797721,1.0,0.715054,0.574399,0.88989,0.169664,0.358003,0.371162,0.474393,0.906898,0.563572,0.627124,10.0,1.860903,22.0,


In [92]:
# Split the teams by conference
eastern_conference_teams = ranked_teams_by_weighted_score[ranked_teams_by_weighted_score['Conference'] == 'East']
western_conference_teams = ranked_teams_by_weighted_score[ranked_teams_by_weighted_score['Conference'] == 'West']

In [96]:
print("Eastern Conference Teams:")
print(normalized_team_averages[normalized_team_averages['Conference'] == 'East'][['WeightedScore']])


Eastern Conference Teams:
Empty DataFrame
Columns: [WeightedScore]
Index: []


In [None]:
print("\nWestern Conference Teams:")
print(western_conference_teams[['TeamName', 'WeightedScore']])

In [94]:
print(eastern_conference_teams.columns)

Index(['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'FG_PCT', 'FT_PCT', 'FG3_PCT',
       'W_PCT', 'AssistRatio', 'WinStreak', 'WeightedScore', 'WeightedRank',
       'TeamScore', 'ConferenceWeightedRank', 'Conference'],
      dtype='object')


In [78]:
standings = leaguestandings.LeagueStandings(season='2022-23').get_data_frames()[0]

In [76]:
standings = standings.set_index('TeamName')

In [79]:
# Extract the team names and playoff ranks
playoff_ranks = standings[['TeamName', 'PlayoffRank']]
playoff_ranks

Unnamed: 0,TeamName,PlayoffRank
0,Nuggets,1
1,Bucks,1
2,Celtics,2
3,Grizzlies,2
4,Kings,3
5,76ers,3
6,Cavaliers,4
7,Suns,4
8,Knicks,5
9,Clippers,5


In [80]:
# Calculate the weighted score for each team
normalized_team_averages['WeightedScore'] = normalized_team_averages[feature_weights.keys()].dot(pd.Series(feature_weights))
normalized_team_averages['WeightedScore']

TeamName
76ers            0.626709
Bucks            0.606548
Bulls            0.435354
Cavaliers        0.542555
Celtics          0.721461
Clippers         0.627124
Grizzlies        0.725278
Hawks            0.524912
Heat             0.483171
Hornets          0.478999
Jazz             0.383186
Kings            0.293739
Knicks           0.721884
Lakers           0.615156
Magic            0.551450
Mavericks        0.399017
Nets             0.447600
Nuggets          0.768332
Pacers           0.526739
Pelicans         0.655852
Pistons          0.311653
Raptors          0.645331
Rockets          0.295546
Spurs            0.278747
Suns             0.737504
Thunder          0.642012
Timberwolves     0.592538
Trail Blazers    0.378831
Warriors         0.633128
Wizards          0.312252
Name: WeightedScore, dtype: float64

In [81]:
# Rank the teams based on their weighted score
normalized_team_averages['WeightedRank'] = normalized_team_averages['WeightedScore'].rank(ascending=False)
normalized_team_averages['WeightedRank']

TeamName
76ers            11.0
Bucks            13.0
Bulls            22.0
Cavaliers        16.0
Celtics           5.0
Clippers         10.0
Grizzlies         3.0
Hawks            18.0
Heat             19.0
Hornets          20.0
Jazz             24.0
Kings            29.0
Knicks            4.0
Lakers           12.0
Magic            15.0
Mavericks        23.0
Nets             21.0
Nuggets           1.0
Pacers           17.0
Pelicans          6.0
Pistons          27.0
Raptors           7.0
Rockets          28.0
Spurs            30.0
Suns              2.0
Thunder           8.0
Timberwolves     14.0
Trail Blazers    25.0
Warriors          9.0
Wizards          26.0
Name: WeightedRank, dtype: float64

In [82]:
# Drop the 'Conference' column from 'normalized_team_averages' dataframe if it exists
if 'Conference' in normalized_team_averages.columns:
    normalized_team_averages = normalized_team_averages.drop(columns=['Conference'])

# Add the 'Conference' column to the 'normalized_team_averages' dataframe
normalized_team_averages = normalized_team_averages.join(standings['Conference'])
normalized_team_averages


Unnamed: 0_level_0,PTS,REB,AST,STL,BLK,TOV,FG_PCT,FT_PCT,FG3_PCT,W_PCT,AssistRatio,WinStreak,WeightedScore,WeightedRank,TeamScore,ConferenceWeightedRank,Conference
TeamName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
76ers,0.677355,0.543536,0.475671,0.694988,0.671053,0.436937,0.241304,0.700602,0.233362,0.905821,0.394704,0.921323,0.626709,11.0,1.323097,12.0,
Bucks,0.56132,0.744302,0.451902,0.321045,0.463938,0.489489,0.185506,0.903925,0.485484,1.0,0.114257,1.0,0.606548,13.0,1.45198,11.0,
Bulls,0.235975,0.351936,0.110738,0.551098,0.473684,0.109698,0.199483,0.251205,0.347157,0.549319,0.611178,0.561643,0.435354,22.0,1.913462,5.0,
Cavaliers,0.282919,0.140774,0.223253,0.535647,0.434698,0.173291,0.528361,9.4e-05,0.53361,0.947922,0.530298,0.884846,0.542555,16.0,1.554162,10.0,
Celtics,0.497099,0.793112,0.418674,0.334779,0.727096,0.279279,0.47864,0.364501,0.392142,0.909404,0.754917,0.97967,0.721461,5.0,0.903627,14.0,
Clippers,0.857814,0.797721,1.0,0.715054,0.574399,0.88989,0.169664,0.358003,0.371162,0.474393,0.906898,0.563572,0.627124,10.0,1.30755,9.0,
Grizzlies,0.633528,0.793112,0.460126,0.844673,0.922027,0.395866,0.412146,0.344491,0.322001,0.837871,0.749218,0.904879,0.725278,3.0,0.810373,14.0,
Hawks,0.646731,0.721468,0.323924,0.628355,0.688109,0.279279,0.401962,0.872642,0.12075,0.518945,0.227246,0.51757,0.524912,18.0,1.747992,7.0,
Heat,0.586585,0.7818,0.394986,0.891027,0.259259,0.491256,0.06949,0.914353,0.0,0.614249,0.15782,0.649789,0.483171,19.0,1.806699,6.0,
Hornets,0.760276,0.696581,0.708054,0.910599,0.762183,0.603604,0.206552,0.679701,0.189925,0.289116,0.330125,0.276993,0.478999,20.0,1.920908,4.0,


In [84]:
# Calculate a simple average score for each team based on the normalized features
numeric_columns = normalized_team_averages.select_dtypes(include=np.number).columns
normalized_team_averages['TeamScore'] = normalized_team_averages[numeric_columns].mean(axis=1)
normalized_team_averages['TeamScore']

TeamName
76ers            2.075635
Bucks            2.138867
Bulls            2.210809
Cavaliers        2.178244
Celtics          1.784693
Clippers         1.860903
Grizzlies        1.691565
Hawks            2.173201
Heat             2.167881
Hornets          2.149092
Jazz             2.339655
Kings            2.445176
Knicks           1.802033
Lakers           1.865100
Magic            2.191000
Mavericks        2.314271
Nets             2.095085
Nuggets          1.665684
Pacers           2.225623
Pelicans         1.815708
Pistons          2.337345
Raptors          1.810283
Rockets          2.473219
Spurs            2.499102
Suns             1.656250
Thunder          1.826832
Timberwolves     1.985799
Trail Blazers    2.391632
Warriors         1.912064
Wizards          2.189529
Name: TeamScore, dtype: float64

In [85]:
# Rank the teams within each conference based on their weighted score
normalized_team_averages['ConferenceWeightedRank'] = normalized_team_averages.groupby('Conference')['WeightedScore'].rank(ascending=False)
normalized_team_averages['ConferenceWeightedRank'] 

TeamName
76ers            18.0
Bucks            16.0
Bulls             9.0
Cavaliers        11.0
Celtics          27.0
Clippers         22.0
Grizzlies        28.0
Hawks            13.0
Heat             14.0
Hornets          15.0
Jazz              5.0
Kings             3.0
Knicks           26.0
Lakers           21.0
Magic            10.0
Mavericks         7.0
Nets             17.0
Nuggets          29.0
Pacers            8.0
Pelicans         24.0
Pistons           6.0
Raptors          25.0
Rockets           2.0
Spurs             1.0
Suns             30.0
Thunder          23.0
Timberwolves     19.0
Trail Blazers     4.0
Warriors         20.0
Wizards          12.0
Name: ConferenceWeightedRank, dtype: float64

In [86]:
# Sort the teams by conference and weighted rank
ranked_teams_weighted_conference = normalized_team_averages.sort_values(['Conference', 'ConferenceWeightedRank'], ascending=True)
ranked_teams_weighted_conference

Unnamed: 0_level_0,PTS,REB,AST,STL,BLK,TOV,FG_PCT,FT_PCT,FG3_PCT,W_PCT,AssistRatio,WinStreak,WeightedScore,WeightedRank,TeamScore,ConferenceWeightedRank,Conference
TeamName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Spurs,0.199834,0.351593,0.389872,0.338291,0.298246,0.52498,0.256175,0.618853,0.374694,0.191526,0.390576,0.222778,2.368493,30.0,2.499102,1.0,
Rockets,0.401158,0.589744,0.265101,0.472811,0.364522,0.675676,0.251361,0.826948,0.324092,0.019463,0.667381,0.0,2.345121,28.0,2.473219,2.0,
Kings,0.043287,0.001068,0.0,0.126959,0.0,0.0,0.365929,0.110276,0.380526,0.714736,0.0,0.788219,2.318621,29.0,2.445176,3.0,
Trail Blazers,0.505901,0.276521,0.229175,0.535647,0.610136,0.438262,0.131791,0.22725,0.461396,0.445872,0.535383,0.437437,2.273232,25.0,2.391632,4.0,
Jazz,0.257899,0.331197,0.088367,0.0,0.703271,0.269269,0.388616,0.123238,0.403586,0.423361,0.633131,0.548436,2.224568,24.0,2.339655,5.0,
Pistons,0.508526,0.552182,0.232603,0.612903,0.472658,0.525842,0.226062,1.0,0.11857,0.0,0.521761,0.108821,2.218186,27.0,2.337345,6.0,
Mavericks,0.30164,0.0,0.000479,0.187623,0.266685,0.056199,0.703911,0.24777,0.464201,0.610476,0.449482,0.58988,2.201609,23.0,2.314271,7.0,
Pacers,0.573382,0.532931,0.614094,0.751965,1.0,0.533651,0.27077,0.736236,0.312234,0.417919,0.520178,0.486852,2.125689,17.0,2.225623,8.0,
Bulls,0.235975,0.351936,0.110738,0.551098,0.473684,0.109698,0.199483,0.251205,0.347157,0.549319,0.611178,0.561643,2.106371,22.0,2.210809,9.0,
Magic,0.668834,0.867521,0.432886,0.928111,0.607537,0.687688,0.353878,0.843238,0.529703,0.46585,0.489429,0.452573,2.094556,15.0,2.191,10.0,


In [74]:
# Merge the 'ranked_teams_weighted_conference' dataframe with the 'playoff_ranks' dataframe
comparison_df = pd.merge(ranked_teams_weighted_conference, playoff_ranks, left_on='TeamName', right_on='TeamName', how='outer')


NameError: name 'ranked_teams_weighted_conference' is not defined

In [117]:
# Reset the index to have 'TeamName' as a column
comparison_df.reset_index(inplace=True)

In [120]:
# Calculate the difference between the WeightedRank and PlayoffRank
comparison_df['RankDifference'] = comparison_df['ConferenceWeightedRank'] - comparison_df['PlayoffRank']
comparison_df['RankDifference']

Series([], Name: RankDifference, dtype: float64)

In [119]:
# Calculate the percentage difference
comparison_df['PercentageDifference'] = (comparison_df['RankDifference'] / comparison_df['PlayoffRank']) * 100

# Display the comparison dataframe
comparison_df

Unnamed: 0,index,ConferenceWeightedRank,TeamName,PlayoffRank,RankDifference,PercentageDifference


In [122]:
# Merge the weighted ranks and playoff ranks dataframes using indexes
comparison_df = ranked_teams_weighted_conference[['ConferenceWeightedRank']].merge(playoff_ranks, left_index=True, right_index=True)
comparison_df


Unnamed: 0,ConferenceWeightedRank,TeamName,PlayoffRank


In [123]:
# Reset the index to have 'TeamName' as a column
comparison_df.reset_index(inplace=True)

In [124]:
# Calculate the difference between the WeightedRank and PlayoffRank
comparison_df['RankDifference'] = comparison_df['ConferenceWeightedRank'] - comparison_df['PlayoffRank']

In [125]:
comparison_df['RankDifference']

Series([], Name: RankDifference, dtype: float64)

In [24]:
# Merge standings and player_stats dataframes
merged_df = pd.merge(player_stats, standings, left_on='TEAM_ID', right_on='TeamID', how='outer')

In [25]:
# Calculate the weighted sum of feature columns for each team
merged_df['weighted_sum'] = 0
for feature, weight in feature_weights.items():
    merged_df['weighted_sum'] += merged_df[feature] * weight


KeyError: 'AssistRatio'