In [29]:
import pandas as pd
from nba_api.stats.static import teams
from nba_api.stats.endpoints import teamgamelog
import time

## Points scored per game per team
We use the nba api endpoints to find how many points each team scored in each game of the regular season or playoffs. 

In [6]:
# Function to create a DataFrame for all teams including postseason games
def get_all_teams_game_stats(season):
    all_teams_game_stats = []

    # Get all NBA teams
    nba_teams = teams.get_teams()
    
    cnt = 1
    for team in nba_teams:
        team_abbreviation = team['abbreviation']

        # Retrieve game logs for the team in the specified season (both regular season and playoffs)
        team_game_log_regular = teamgamelog.TeamGameLog(team_id=team['id'], season=season, season_type_all_star='Regular Season')
        team_game_log_playoffs = teamgamelog.TeamGameLog(team_id=team['id'], season=season, season_type_all_star='Playoffs')

        # Concatenate regular season and playoff game logs
        team_game_log_df = pd.concat([team_game_log_regular.get_data_frames()[0], team_game_log_playoffs.get_data_frames()[0]])

        # Create a new DataFrame with selected columns
        team_game_stats_df = team_game_log_df[['Game_ID', 'PTS']].copy()
        team_game_stats_df['TEAM_ABBREVIATION'] = team_abbreviation
        
        # Append the team DataFrame to the list
        all_teams_game_stats.append(team_game_stats_df)
        
        cnt += 1
        if cnt % 5 == 0: # sleep so that requests don't time out
            time.sleep(2)
    # Concatenate all team DataFrames into one
    result_df = pd.concat(all_teams_game_stats, ignore_index=True)

    return result_df

# Specify the season you are interested in (e.g., '2021-22')
season = '2021-22'

# Get the DataFrame with game stats for all teams
all_teams_game_stats_df = get_all_teams_game_stats(season)

# Sort
all_teams_game_stats_df = all_teams_game_stats_df.sort_values(by='Game_ID')

#Display
all_teams_game_stats_df


Unnamed: 0,Game_ID,PTS,TEAM_ABBREVIATION
1168,0022100001,127,MIL
1350,0022100001,104,BKN
718,0022100002,121,GSW
986,0022100002,114,LAL
2633,0022100003,123,CHA
...,...,...,...
171,0042100404,97,BOS
720,0042100405,104,GSW
170,0042100405,94,BOS
169,0042100406,90,BOS


In [3]:
file = './all_teams_game_stats.csv'

# Save the DataFrame to a CSV file
all_teams_game_stats_df.to_csv(file, index=False)

In [8]:
# Example for Boston
df = all_teams_game_stats_df
df_boston = df[df['TEAM_ABBREVIATION'] == 'BOS']
df_boston.head()

Unnamed: 0,Game_ID,PTS,TEAM_ABBREVIATION
168,22100005,134,BOS
167,22100020,83,BOS
166,22100037,107,BOS
165,22100041,140,BOS
164,22100056,107,BOS


## Betting odds
We will use an nba betting odds dataset (https://www.sportsbookreviewsonline.com/scoresoddsarchives/nba-odds-2021-22/) to get the expected result of each game.
In this way we can predict whether a team over/under performed.

In [35]:
df = pd.read_csv('NBA betting odds 2021.csv')
df['Team'] = df['Team'].replace('Golden State', 'GoldenState')

predictions = dict()
teams_names = df['Team'].unique()

for team in teams_names:
    games = []
    # Iterate through all pairs of lines
    for i in range(0, len(df), 2):
        # Check if there's an even number of remaining lines
        if i + 1 < len(df):
            # Access the pair of lines
            line1 = df.iloc[i]
            line2 = df.iloc[i + 1]

            team1 = line1['Team']
            team2 = line2['Team']

            if team2 == 'Boston':
                line1, line2 = line2, line1
                team1, team2 = team2, team1

            if team1 == 'Boston':
                try:
                    close1 = float(line1['Close'])
                    close2 = float(line2['Close'])
                except:
                    close1 = float(line1['Open'])
                    close2 = float(line2['Open'])
                if close1 < close2:
                    diff = close1
                else:
                    diff = -close2
                games.append(diff)
    predictions[team] = games  

In [36]:
df_predictions = pd.DataFrame(predictions)

In [37]:
df_predictions.columns

Index(['Brooklyn', 'Milwaukee', 'GoldenState', 'LALakers', 'Indiana',
       'Charlotte', 'Chicago', 'Detroit', 'Washington', 'Toronto', 'Boston',
       'NewYork', 'Cleveland', 'Memphis', 'Philadelphia', 'NewOrleans',
       'Houston', 'Minnesota', 'Orlando', 'SanAntonio', 'OklahomaCity', 'Utah',
       'Sacramento', 'Portland', 'Denver', 'Phoenix', 'Dallas', 'Atlanta',
       'Miami', 'LAClippers'],
      dtype='object')

In [38]:
column_mapping = {
    'Brooklyn': 'BKN',
    'Milwaukee': 'MIL',
    'GoldenState': 'GSW',
    'LALakers': 'LAL',
    'Indiana': 'IND',
    'Charlotte': 'CHA',
    'Chicago': 'CHI',
    'Detroit': 'DET',
    'Washington': 'WAS',
    'Toronto': 'TOR',
    'Boston': 'BOS',
    'NewYork': 'NYK',
    'Cleveland': 'CLE',
    'Memphis': 'MEM',
    'Philadelphia': 'PHI',
    'NewOrleans': 'NOP',
    'Houston': 'HOU',
    'Minnesota': 'MIN',
    'Orlando': 'ORL',
    'SanAntonio': 'SAS',
    'OklahomaCity': 'OKC',
    'Utah': 'UTA',
    'Sacramento': 'SAC',
    'Portland': 'POR',
    'Denver': 'DEN',
    'Phoenix': 'PHX',
    'Dallas': 'DAL',
    'Atlanta': 'ATL',
    'Miami': 'MIA',
    'LAClippers': 'LAC',
}

In [39]:
df_predictions = df_predictions.rename(columns=column_mapping)