In [None]:
import numpy as np
import pandas as pd
import random
import time
from unidecode import unidecode

In [None]:
teams = [
    'atl', 'bos', 'brk', 'cho', 'chi', 'cle', 'dal', 
    'den', 'det', 'gsw', 'hou', 'ind', 'lac', 'lal', 
    'mem', 'mia', 'mil', 'min', 'nop', 'nyk', 'okc', 
    'orl', 'phi', 'pho', 'por', 'sac', 'sas', 'tor', 
    'uta', 'was'
]
len(teams)

In [None]:
seasons = [
    '2014', '2015', '2016', '2017', '2018',
    '2019', '2020', '2021', '2022', '2023', 
]
len(seasons)

In [None]:
# Stats dictionaries
stats = [
    'FG', 'FGA', 'fg%', 
    '3P', '3PA', '3P%',
    'FT', 'FTA', 'FT%',
    'ORB', 'TRB', 'AST',
    'STL', 'BLK', 'TOV',
    'PF'
]

tm_stats_dictionary = {stat: 'Tm_' + str(stat) for stat in stats}
opp_stats_dictionary = {stat + '.1': 'Opp_' + str(stat) for stat in stats}

In [None]:
# Data frame to append into
nba_Data_Frame = pd.DataFrame()

# Iterate through seasons and teams
for season in seasons:
    for team in teams:
        url = f'https://www.basketball-reference.com/teams/{team}/{season}/gamelog/'
        print(f"Scraping URL: {url}")

        try:
            # Read the HTML table
            team_df = pd.read_html(url, header=1, attrs={'id': 'tgl_basic'})[0]

            # Filter out unwanted rows and drop unnecessary columns
            team_df = team_df[(team_df['Rk'].str != '') & (team_df['Rk'].str.isnumeric())]
            team_df = team_df.drop(columns=['Rk', 'Unnamed: 24'], errors='ignore')  # Drop columns safely

            # Rename columns for clarity
            team_df = team_df.rename(columns={'Unnamed: 3': 'Home', 'Tm': 'tm_pts', 'Opp.1': 'Opp_pts'})
            team_df = team_df.rename(columns=tm_stats_dictionary)
            team_df = team_df.rename(columns=opp_stats_dictionary)

            # Convert 'Home' column values to binary
            team_df['Home'] = team_df['Home'].apply(lambda x: 0 if x == '@' else 1)

            # Add season and team information
            team_df.insert(loc=0, column='Season', value=season)
            team_df.insert(loc=1, column='Team', value=team.upper())

            # Append data to the main dataframe
            nba_Data_Frame = pd.concat([nba_Data_Frame, team_df], ignore_index=True)

            # Pause to respect scraping limits
            time.sleep(random.randint(4, 6))  
        
print(nba_Data_Frame)

In [None]:
print(nba_Data_Frame)

In [None]:
nba_Data_Frame.to_csv('nba-gamelogs-2014-2023.csv', index = False)