In [25]:
# Import the libraries
import numpy as np
import pandas as pd
import random
import time
from unidecode import unidecode

In [47]:
# Create list of team codes
teams = [
    'atl', 'bos', 'brk', 'cho', 'chi', 'cle', 'dal', 'den', 'det', 'gsw',
    'hou', 'ind', 'lac', 'lal', 'mem', 'mia', 'mil', 'min', 'nop', 'nyk',
    'okc', 'orl', 'phi', 'pho', 'por', 'sac', 'sas', 'tor', 'uta', 'was'
]

In [49]:
# Create the list of years (seasons)
# seasons = ['2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']
seasons = ['2024']
len(seasons)

1

In [51]:
# Create the list of stats
stats = [
    'FG', 'FGA', 'FG%',
    '3P', '3PA', '3P%',
    'FT', 'FTA', 'FT%',
    'ORB', 'TRB', 'AST',
    'STL', 'BLK', 'TOV', 'PF'
]

In [52]:
# Create the team stats dictionary
tm_stats_dict = {stat: 'Tm_' + str(stat) for stat in stats}

# Create the opponent stats dictionary
opp_stats_dict = {stat + '.1': 'Opp_' + str(stat) for stat in stats}

In [53]:
# Create an empty dataframe to append
nba_df = pd.DataFrame()

# Iterate through the seasons
for season in seasons:

    # Iterate through the teams
    for team in teams:

        # Set the URL
        url = 'https://www.basketball-reference.com/teams/' + team + '/' + season + '/gamelog/'
        print(url)

        # Get game stats from 'tgl_basic' table (this is the scraping statement)
        team_df = pd.read_html(url, header=1, attrs={'id':'tgl_basic'})[0]

        # Drop rows where 'Rk' is null or where 'Rk' is equal to 'Rk'
        team_df = team_df[(team_df['Rk'].str != '') & (team_df['Rk'].str.isnumeric())]

        # Drop the blank column
        team_df = team_df.drop(columns=['Rk', 'Unnamed: 24'])

        # Rename a column
        team_df = team_df.rename(columns={'Unnamed: 3':'Home', 'Tm':'Tm_Pts', 'Opp.1':'Opp_Pts'})
        team_df = team_df.rename(columns=tm_stats_dict)
        team_df = team_df.rename(columns=opp_stats_dict)

        # Replace values in columns 'Home' and 'Opp' of team_df
        team_df['Home'] = team_df['Home'].apply(lambda x: 0 if x == '@' else 1)

        # Add two columns to the front of team_df
        team_df.insert(loc=0, column='Season', value=season)
        team_df.insert(loc=1, column='Team', value=team.upper())

        # Append the current year and team gamelogs to the aggregate dataframe
        nba_df = pd.concat([nba_df, team_df], ignore_index=True)

        # Pause program to abide by basketball-reference.com rules
        time.sleep(random.randint(4, 6))

# Display the aggregate dataframe
# print(nba_df)

https://www.basketball-reference.com/teams/atl/2023/gamelog/
https://www.basketball-reference.com/teams/bos/2023/gamelog/
https://www.basketball-reference.com/teams/brk/2023/gamelog/
https://www.basketball-reference.com/teams/cho/2023/gamelog/
https://www.basketball-reference.com/teams/chi/2023/gamelog/
https://www.basketball-reference.com/teams/cle/2023/gamelog/
https://www.basketball-reference.com/teams/dal/2023/gamelog/
https://www.basketball-reference.com/teams/den/2023/gamelog/
https://www.basketball-reference.com/teams/det/2023/gamelog/
https://www.basketball-reference.com/teams/gsw/2023/gamelog/
https://www.basketball-reference.com/teams/hou/2023/gamelog/
https://www.basketball-reference.com/teams/ind/2023/gamelog/
https://www.basketball-reference.com/teams/lac/2023/gamelog/
https://www.basketball-reference.com/teams/lal/2023/gamelog/
https://www.basketball-reference.com/teams/mem/2023/gamelog/
https://www.basketball-reference.com/teams/mia/2023/gamelog/
https://www.basketball-r

In [54]:
print(nba_df)

     Season Team   G        Date  Home  Opp W/L Tm_Pts Opp_Pts Tm_FG  ...  \
0      2023  ATL   1  2022-10-19     1  HOU   W    117     107    45  ...   
1      2023  ATL   2  2022-10-21     1  ORL   W    108      98    40  ...   
2      2023  ATL   3  2022-10-23     1  CHO   L    109     126    39  ...   
3      2023  ATL   4  2022-10-26     0  DET   W    118     113    45  ...   
4      2023  ATL   5  2022-10-28     0  DET   W    136     112    55  ...   
...     ...  ...  ..         ...   ...  ...  ..    ...     ...   ...  ...   
2455   2023  WAS  78  2023-04-02     0  NYK   L    109     118    39  ...   
2456   2023  WAS  79  2023-04-04     1  MIL   L    128     140    50  ...   
2457   2023  WAS  80  2023-04-05     0  ATL   L    116     134    45  ...   
2458   2023  WAS  81  2023-04-07     1  MIA   W    114     108    39  ...   
2459   2023  WAS  82  2023-04-09     1  HOU   L    109     114    41  ...   

     Opp_FT Opp_FTA Opp_FT% Opp_ORB Opp_TRB Opp_AST Opp_STL Opp_BLK Opp_TOV

In [63]:
# Export to a CSV File
nba_df.to_csv('nba_gamelogs_2024.csv', index=False)

In [65]:
"""
References
[1] data: https://www.youtube.com/watch?v=pZBcBrOxCVQ
"""

'\nReferences\n[1] data: https://www.youtube.com/watch?v=pZBcBrOxCVQ\n'