# Imports


In [17]:
from basketball_reference_scraper.players import get_stats, get_game_logs, get_player_headshot
from basketball_reference_scraper.teams import get_roster, get_team_stats, get_opp_stats, get_roster_stats, get_team_misc
from basketball_reference_scraper.pbp import get_pbp
from basketball_reference_scraper.shot_charts import get_shot_chart
from basketball_reference_scraper.box_scores import get_box_scores
from basketball_reference_scraper.seasons import get_schedule, get_standings
from basketball_reference_scraper.utils import get_player_suffix, RetriableRequest, remove_accents
from basketball_reference_scraper.lookup import lookup
from basketball_reference_scraper.constants import CATEGORY_COLUMNS, CATEGORIES, TEAM_CATEGORIES
import pandas as pd
import time
import os

YEAR_TEAM_ROSTER = {}


# TESTING

# DATA RETRIEVAL

In [26]:
yearRange= [2021,2023] # YEARS DONE: 2000-2003, 1004 ATL

# Player Data

In [27]:
# FETCH TEAM ROSTERS + TEAM DATA
team_dfs = {
    "per_game": pd.DataFrame(),
    "totals" : pd.DataFrame(),
    "per_minute" : pd.DataFrame(),
    "per_poss" : pd.DataFrame(),
    "advanced" : pd.DataFrame(),
    "adj_shooting" : pd.DataFrame(),
    "shooting" : pd.DataFrame()
}


for year in range(yearRange[0],yearRange[1]+1):
    teams=  None
    print(year)
    stat_df = get_team_stats(season_end_year=year)
    teams = list(stat_df['per_game']['TEAM'])
    for category in TEAM_CATEGORIES:
        team_dfs[category] = pd.concat([team_dfs[category], stat_df[category]])
    TEAM_ROSTER = {}
    for team in teams:
        ROSTER = get_roster(team,year)
        ROSTER['PLAYER'] = ROSTER['PLAYER'].apply(lambda x: remove_accents(x.replace('(TW)', ''),team = team, season_end_year= year).strip())
        TEAM_ROSTER[team] = ROSTER[['PLAYER','BIRTH_DATE']]
    YEAR_TEAM_ROSTER[year] = TEAM_ROSTER

2021
Request # 13 @ 9:15:58
 Request limit reached. Waiting a minute...
Request # 13 @ 9:17:11
 Request limit reached. Waiting a minute...
Request # 13 @ 9:18:24
 Request limit reached. Waiting a minute...
Request # 13 @ 9:19:36
 Request limit reached. Waiting a minute...
Request # 13 @ 9:20:49
 Request limit reached. Waiting a minute...
Request # 13 @ 9:22:19
 Request limit reached. Waiting a minute...


KeyboardInterrupt: 

In [None]:
YEAR_TEAM_ROSTER

{2000: {'ATL':              PLAYER  BIRTH_DATE
  0        Drew Barry  1973-02-17
  1       Cal Bowdler  1977-03-31
  2       Bimbo Coles  1968-04-22
  3    Chris Crawford  1975-05-13
  4    LaPhonso Ellis  1970-05-05
  5       Dion Glover  1978-10-22
  6    Alan Henderson  1972-12-02
  7       Jim Jackson  1970-10-14
  8   Anthony Johnson  1974-10-02
  9    Roshown McLeod  1975-11-17
  10  Dikembe Mutombo  1966-06-25
  11     Isaiah Rider  1971-03-12
  12      Jason Terry  1977-09-15
  13  Lorenzen Wright  1975-11-04,
  'BOS':               PLAYER  BIRTH_DATE
  0     Kenny Anderson  1970-10-09
  1        Dana Barros  1967-04-13
  2        Tony Battie  1976-02-11
  3    Calbert Cheaney  1971-07-17
  4     Pervis Ellison  1967-04-03
  5      Danny Fortson  1976-03-27
  6     Adrian Griffin  1974-07-04
  7     Walter McCarty  1974-02-01
  8       Doug Overton  1969-08-03
  9        Paul Pierce  1977-10-13
  10  Vitaly Potapenko  1975-03-21
  11      Jamel Thomas  1973-03-25
  12      Wayn

In [25]:
# WRITE TEAM DATA

directory = "team_data"
if not os.path.exists(directory):
    os.makedirs(directory)
for key in TEAM_CATEGORIES:
    print(key)

    team_dfs[key].to_csv(os.path.join(directory, f'team_{key}.csv'), index=False)

totals
per_game
per_poss
shooting
advanced


In [5]:
# WRITE ROSTER DATA

import os

# ...
directory = "roster_data"
if not os.path.exists(directory):
        os.makedirs(directory)

for year in YEAR_TEAM_ROSTER:
    newDirectory = os.path.join(directory, str(year))
    if not os.path.exists(newDirectory):
        os.makedirs(newDirectory)
    for team in YEAR_TEAM_ROSTER[year]:
        file_path = os.path.join(newDirectory, f'{team}.txt')
        """with open(file_path, 'w', encoding='utf-8') as file:
            for item in YEAR_TEAM_ROSTER[year][team]:
                file.write(f'{item['PLAYER']},{item['BIRTH_DATE']}\n')"""
        YEAR_TEAM_ROSTER[year][team].to_csv(file_path, index=False)


In [9]:
# FETCH ROSTER DATA FROM TXT (ALL PLAYERS)
file_path = 'C:\\Users\\chris\\AppData\\Local\\Programs\\Python\\Python36\\Lib\\site-packages\\basketball_reference_scraper\\br_names.txt'

with open(file_path, 'r') as file:
    content = file.read()
    ROSTER = content.split('\n')


In [21]:
# RETRIEVE YEAR_TEAM_ROSTER DATA FROM FILES (2000 AND ON)
base_directory = 'roster_data'

# Iterate over each year directory
for year in os.listdir(base_directory):

    year_int = int(year)  # Convert year to integer

    YEAR_TEAM_ROSTER[year_int] = {}  # Use the integer year as the key

    year_path = os.path.join(base_directory, year)
    # Iterate over each team.txt file in the year directory
    for team_path in os.listdir(year_path):
            
        # Construct the full file path
        file_path = os.path.join(year_path, team_path)
        team = team_path[:-4]
        # Perform operations on the file
        # For example, read the file contents
        """with open(file_path, 'r', encoding='utf-8') as file:
            contents = file.read().splitlines()
            contents = [line.split(',') for line in contents]
        YEAR_TEAM_ROSTER[year_int][team] = [(line[0].replace('(TW)', '').strip(),line[1]) for line in contents]"""
        YEAR_TEAM_ROSTER[year_int][team] = pd.read_csv(file_path)


ROSTER_BIRTHDATE = pd.DataFrame()

for year in YEAR_TEAM_ROSTER:
    for team in YEAR_TEAM_ROSTER[year]:
        ROSTER_BIRTHDATE  = pd.concat([YEAR_TEAM_ROSTER[year][team][['PLAYER','BIRTH_DATE']],ROSTER_BIRTHDATE])
ROSTER_BIRTHDATE.drop_duplicates()
ROSTER_BIRTHDATE['PLAYER'] = ROSTER_BIRTHDATE['PLAYER'].apply(lambda x: x.replace('(TW)', '').strip())
            

In [23]:
set(ROSTER_BIRTHDATE['PLAYER']) - set(ROSTER)

{'Brent Barry',
 'Chris Dudley',
 'Thaddeus Young',
 'Mitch Richmond',
 'Naz Reid',
 'Chimezie Metu',
 'Ruben Boumtje-Boumtje',
 'Noel Felix',
 "Kyle O'Quinn",
 'Todd Fuller',
 "E'Twaun Moore",
 'Tony Smith',
 'Meyers Leonard',
 'Kedrick Brown',
 'Maurice Harkless',
 'Rodney White',
 'Malik Sealy',
 'Michael Gbinije',
 'Dee Brown',
 'Iman Shumpert',
 'Jeff Hornacek',
 'Yaroslav Korolev',
 'Rasho Nesterović',
 'Thomas Gardner',
 'Tim Hardaway',
 'Álex Abrines',
 'Tibor Pleiß',
 'Deron Williams',
 'Terrance Ferguson',
 'Andray Blatche',
 'Manu Ginóbili',
 'Leandro Barbosa',
 'Adam Mokoka',
 'Neemias Queta',
 'Lamar Patterson',
 'Danny Fortson',
 'Nik Stauskas',
 'Tyler Davis',
 'Acie Law',
 'Kyle Guy',
 'Greivis Vásquez',
 'Andrae Patterson',
 'R.J. Hampton',
 'Daniel Theis',
 'Max Christie',
 'Esteban Batista',
 'Semi Ojeleye',
 'Chris McCullough',
 'Justin Williams',
 'Maurice Carter',
 'DeJon Jarreau',
 'Melvin Frazier',
 'Shane Larkin',
 'Tony Snell',
 'Zaza Pachulia',
 'Terrence Wil

{'Brent Barry',
 'Chris Dudley',
 'Thaddeus Young',
 'Mitch Richmond',
 'Naz Reid',
 'Chimezie Metu',
 'Ruben Boumtje-Boumtje',
 'Noel Felix',
 "Kyle O'Quinn",
 'Todd Fuller',
 "E'Twaun Moore",
 'Tony Smith',
 'Meyers Leonard',
 'Kedrick Brown',
 'Maurice Harkless',
 'Rodney White',
 'Malik Sealy',
 'Michael Gbinije',
 'Dee Brown',
 'Iman Shumpert',
 'Jeff Hornacek',
 'Yaroslav Korolev',
 'Rasho Nesterović',
 'Thomas Gardner',
 'Tim Hardaway',
 'Álex Abrines',
 'Tibor Pleiß',
 'Deron Williams',
 'Terrance Ferguson',
 'Andray Blatche',
 'Manu Ginóbili',
 'Leandro Barbosa',
 'Adam Mokoka',
 'Neemias Queta',
 'Lamar Patterson',
 'Danny Fortson',
 'Nik Stauskas',
 'Tyler Davis',
 'Acie Law',
 'Kyle Guy',
 'Greivis Vásquez',
 'Andrae Patterson',
 'R.J. Hampton',
 'Daniel Theis',
 'Max Christie',
 'Esteban Batista',
 'Semi Ojeleye',
 'Chris McCullough',
 'Justin Williams',
 'Maurice Carter',
 'DeJon Jarreau',
 'Melvin Frazier',
 'Shane Larkin',
 'Tony Snell',
 'Zaza Pachulia',
 'Terrence Wil

In [3]:
VisitedPlayer = set()
# RETRIEVE YEAR_TEAM_ROSTER DATA FROM FILES
base_directory = 'player_data'

# Iterate over each year directory

# 2000 special case

# Convert year to integer
year_path = os.path.join(base_directory, "2000")
# Iterate over each team.txt file in the year directory
# Construct the full file path
file_path = os.path.join(year_path,os.listdir(year_path)[0])
players = pd.read_csv(file_path)
for player in players['PLAYER']:
    VisitedPlayer.add(player)


for year in os.listdir(base_directory)[1:]:

    year_int = int(year)  # Convert year to integer
    year_path = os.path.join(base_directory, year)
    # Iterate over each team.txt file in the year directory
    for team_path in os.listdir(year_path):
            
        # Construct the full file path
        file_path = os.path.join(year_path, team_path)
        files = os.listdir(file_path)
        if files:
            file_path = os.path.join(file_path,os.listdir(file_path)[0])
            players = pd.read_csv(file_path)
            for player in players['PLAYER']:
                VisitedPlayer.add(player)



            

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'player_data\\2000'

In [5]:
def concat_df_dic(dic1,dic2):
    newDic = {}
    for category in dic1:
        newDic[category] = pd.concat([dic1[category],dic2[category]])
    return newDic

In [8]:
# PLAYER DATA
player_dfs = {
    "per_game": pd.DataFrame(),
    "totals": pd.DataFrame(),
    "per_minute": pd.DataFrame(),
    "per_poss": pd.DataFrame(),
    "advanced": pd.DataFrame(),
    "adj_shooting": pd.DataFrame(),
    "shooting": pd.DataFrame(),
    "game_logs": pd.DataFrame(),
    "playoff_game_logs": pd.DataFrame(),
    "advanced_game_logs": pd.DataFrame(),
}

# Directory for storing player data
base_directory = "player_data"

# Number of names per bucket
names_per_bucket = 10

# Create base directory if it doesn't exist
if not os.path.exists(base_directory):
    os.makedirs(base_directory)

# Iterate through the roster

def store_data_in_buckets(df, base_directory, names_per_bucket):
    num_players = len(df)
    combined_dfs = None
    for i, row in df.iterrows():
        player = row['PLAYER']
        birthdate = row['BIRTH_DATE']
        bucket_num = (i // names_per_bucket) + 1

        # Create bucket directory if it doesn't exist
        bucket_directory = os.path.join(base_directory, f"bucket_num{bucket_num}")
        if not os.path.exists(bucket_directory):
            os.makedirs(bucket_directory)


        suffix = get_player_suffix(lookup(player, True), birthdate)[:-5]
        print(suffix)
        # Get player data as a DataFrame
        current_player_dfs = get_stats(player)
        game_logs = get_game_logs(suffix = suffix)
        current_player_dfs['game_logs'] = game_logs['game_logs']
        current_player_dfs['playoff_game_logs'] = game_logs['playoff_game_logs']
        current_player_dfs['advanced_game_logs'] = game_logs['advanced_game_logs']

        # Concatenate player_df with previous data if needed
        if i % names_per_bucket == 0:
            combined_dfs = current_player_dfs
        else:
            combined_dfs = concat_df_dic(combined_dfs , current_player_dfs)

        # If it's the last player in the bucket or the last player overall, store the combined_df
        if (i + 1) % names_per_bucket == 0 or i == num_players - 1:
            for category in combined_dfs:
                file_path = os.path.join(bucket_directory, f"{category}.csv")
                combined_dfs[category].to_csv(file_path, index=False)

base_directory = "player_data"
names_per_bucket = 10

store_data_in_buckets(ROSTER_BIRTHDATE, base_directory, names_per_bucket)


    
            

You searched for "Daniel Gafford"
1 result found.
Daniel Gafford
Results for Daniel Gafford:

/players/g/gaffoda019
You searched for "Daniel Gafford"
1 result found.
Daniel Gafford
Results for Daniel Gafford:

retrieving  per_game1
retrieving  totals
retrieving  per_minute
retrieving  per_poss
retrieving  advanced
retrieving  adj_shooting
retrieving  shooting
getting big batch
Request # 4 @ 10:6:53

{'2020-21', '2019-20', '2021-22', '2022-23'}
Request # 5 @ 10:6:55

  result = method(y)




Request # 6 @ 10:6:56

Request # 7 @ 10:6:58

Request # 8 @ 10:6:59

Request # 9 @ 10:7:0

Request # 10 @ 10:7:2

Request # 11 @ 10:7:4

Request # 12 @ 10:7:6

You searched for "Deni Avdija"
1 result found.
Deni Avdija
Results for Deni Avdija:

/players/a/avdijde018
You searched for "Deni Avdija"
1 result found.
Deni Avdija
Results for Deni Avdija:


 Request limit reached. Waiting a minute...
retrieving  per_game0
retrieving  totals
retrieving  per_minute
retrieving  per_poss
retrieving  advanced
retrieving  adj_shooting
retrieving  shooting
getting big batch
{'2020-21', '2021-22', '2022-23'}
Request # 3 @ 10:8:13

Request # 4 @ 10:8:14

Request # 5 @ 10:8:15

Request # 6 @ 10:8:16

Request # 7 @ 10:8:17

Request # 8 @ 10:8:19

You searched for "Corey Kispert"
0 results found.


TypeError: 'NoneType' object is not subscriptable

In [12]:
lookup("Corey Kispert", True)

You searched for "Corey Kispert"
0 results found.


''

In [None]:
player_dfs = {
    "per_game": pd.DataFrame(),
    "totals": pd.DataFrame(),
    "per_minute": pd.DataFrame(),
    "per_poss": pd.DataFrame(),
    "advanced": pd.DataFrame(),
    "adj_shooting": pd.DataFrame(),
    "shooting": pd.DataFrame(),
    "game_logs": pd.DataFrame()
}


    







found_last_team = False
directory = "player_data"
for year in range(yearRange[0], yearRange[1]+1):
    print(year)
    TEAMS = YEAR_TEAM_ROSTER[year]
    
    newDirectory = os.path.join(directory, str(year))
    if not os.path.exists(newDirectory):
        os.makedirs(newDirectory)
    for team in TEAMS:
        print(team)
        teamDirectory = os.path.join(newDirectory, team)
        if not os.path.exists(teamDirectory):
            os.makedirs(teamDirectory)
        # --------------------- just for 2001 tor and on
        if team == 'SAS':
           found_last_team = True
        #found_last_team = True
        if found_last_team:
        # --------------------
            ROSTER = YEAR_TEAM_ROSTER[year][team]
            current_team_dfs = {
                "per_game": pd.DataFrame(),
                "totals": pd.DataFrame(),
                "per_minute": pd.DataFrame(),
                "per_poss": pd.DataFrame(),
                "advanced": pd.DataFrame(),
                "adj_shooting": pd.DataFrame(),
                "shooting": pd.DataFrame(),
                "game_logs": pd.DataFrame()
            }
            for index, row in ROSTER.iterrows():
                player = row['PLAYER']
                if player not in VisitedPlayer:
                    birthdate = row['BIRTH_DATE']
                    print(player)

                    suffix = get_player_suffix(
                        lookup(player, True), birthdate)[:-5]
                    """for category in CATEGORIES:
                        print(category)
                        df = get_stats(category,suffix = suffix)
                        df['PLAYER'] = player
                        player_dfs[category] = pd.concat([player_dfs[category],df])"""
                    # retrieve all stats
                    current_player_dfs = get_stats(suffix=suffix)
                    current_player_dfs["game_logs"] = get_game_logs(suffix=suffix, year=year)
                    for key in current_player_dfs:
                        current_player_dfs[key]['PLAYER'] = player

                    # store all stats
                    current_team_dfs = concat_df_dic(current_team_dfs,current_player_dfs)
            file_path = os.path.join(teamDirectory)
            for category in current_team_dfs:
                current_file_path = os.path.join(file_path, f'{category}.csv')
                current_team_dfs[category].to_csv(current_file_path, index=False)
                print(current_file_path)
            player_dfs = concat_df_dic(player_dfs,current_team_dfs)   


In [24]:
player_dfs = {
    "per_game": pd.DataFrame(),
    "totals": pd.DataFrame(),
    "per_minute": pd.DataFrame(),
    "per_poss": pd.DataFrame(),
    "advanced": pd.DataFrame(),
    "adj_shooting": pd.DataFrame(),
    "shooting": pd.DataFrame(),
    "game_logs": pd.DataFrame()
}

found_last_team = False
directory = "player_data"
for year in range(yearRange[0], yearRange[1]+1):
    print(year)
    TEAMS = YEAR_TEAM_ROSTER[year]
    
    newDirectory = os.path.join(directory, str(year))
    if not os.path.exists(newDirectory):
        os.makedirs(newDirectory)
    for team in TEAMS:
        print(team)
        teamDirectory = os.path.join(newDirectory, team)
        if not os.path.exists(teamDirectory):
            os.makedirs(teamDirectory)
        # --------------------- just for 2001 tor and on
        if team == 'SAS':
           found_last_team = True
        #found_last_team = True
        if found_last_team:
        # --------------------
            ROSTER = YEAR_TEAM_ROSTER[year][team]
            current_team_dfs = {
                "per_game": pd.DataFrame(),
                "totals": pd.DataFrame(),
                "per_minute": pd.DataFrame(),
                "per_poss": pd.DataFrame(),
                "advanced": pd.DataFrame(),
                "adj_shooting": pd.DataFrame(),
                "shooting": pd.DataFrame(),
                "game_logs": pd.DataFrame()
            }
            for index, row in ROSTER.iterrows():
                player = row['PLAYER']
                if player not in VisitedPlayer:
                    birthdate = row['BIRTH_DATE']
                    print(player)

                    suffix = get_player_suffix(
                        lookup(player, True), birthdate)[:-5]
                    """for category in CATEGORIES:
                        print(category)
                        df = get_stats(category,suffix = suffix)
                        df['PLAYER'] = player
                        player_dfs[category] = pd.concat([player_dfs[category],df])"""
                    # retrieve all stats
                    current_player_dfs = get_stats(suffix=suffix)
                    current_player_dfs["game_logs"] = get_game_logs(suffix=suffix, year=year)
                    for key in current_player_dfs:
                        current_player_dfs[key]['PLAYER'] = player

                    # store all stats
                    current_team_dfs = concat_df_dic(current_team_dfs,current_player_dfs)
            file_path = os.path.join(teamDirectory)
            for category in current_team_dfs:
                current_file_path = os.path.join(file_path, f'{category}.csv')
                current_team_dfs[category].to_csv(current_file_path, index=False)
                print(current_file_path)
            player_dfs = concat_df_dic(player_dfs,current_team_dfs)   


2004
ATL
BOS
CHI
CLE
DAL
DEN
DET
GSW
HOU
IND
LAC
LAL
MEM
MIA
MIL
MIN
NJN
NOH
NYK
ORL
PHI
PHO
POR
SAC
SAS
Alex Garcia
You searched for "Alex Garcia"
4 results found.
Request # 3 @ 4:38:56
REQUEST!
Request # 4 @ 4:38:57
REQUEST!
Request # 5 @ 4:38:58
REQUEST!
Shane Heal
You searched for "Shane Heal"
2 results found.
Request # 6 @ 4:38:59
REQUEST!
Request # 7 @ 4:39:0
REQUEST!
Request # 8 @ 4:39:1
REQUEST!
player_data\2004\SAS\per_game.csv
player_data\2004\SAS\totals.csv
player_data\2004\SAS\per_minute.csv
player_data\2004\SAS\per_poss.csv
player_data\2004\SAS\advanced.csv
player_data\2004\SAS\adj_shooting.csv
player_data\2004\SAS\shooting.csv
player_data\2004\SAS\game_logs.csv
SEA
Richie Frahm
You searched for "Richie Frahm"
2 results found.
Request # 9 @ 4:39:2
REQUEST!
Request # 10 @ 4:39:2
REQUEST!
Request # 11 @ 4:39:3
REQUEST!
Luke Ridnour
You searched for "Luke Ridnour"
1 result found.
Luke Ridnour
Results for Luke Ridnour:

Request # 12 @ 4:39:5
REQUEST!
Request # 13 @ 4:39:5
REQU

In [14]:
get_game_logs(suffix =  get_player_suffix(lookup(player, True), birthdate)[:-5],year = 2003)


You searched for "Antoine Walker"
2 results found.
Request # 3 @ 8:48:27
REQUEST!
Request # 4 @ 8:48:28
REQUEST!
Request # 5 @ 8:48:29
REQUEST!


Unnamed: 0,DATE,AGE,TEAM,HOME/AWAY,OPPONENT,RESULT,GS,MP,FG,FGA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GAME_SCORE,+/-
0,2002-10-30,26-079,BOS,HOME,CHI,L (-3),1,43:05,5,23,...,10,11,4,3,0,2,2,14,6.2,-3
1,2002-10-31,26-080,BOS,AWAY,WAS,L (-45),1,34:12,5,16,...,7,9,0,1,0,7,2,12,-0.5,-28
2,2002-11-02,26-082,BOS,AWAY,NYK,W (+10),1,43:22,6,15,...,5,5,10,2,1,4,2,18,15.9,+20
3,2002-11-06,26-086,BOS,AWAY,CHI,W (+22),1,32:24,7,19,...,7,7,5,0,0,2,4,22,12.7,+20
4,2002-11-07,26-087,BOS,HOME,LAL,W (+3),1,47:22,6,17,...,8,9,8,3,1,6,2,13,9.1,+6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,2003-04-09,26-240,BOS,AWAY,WAS,W (+4),1,44:13,8,21,...,8,8,3,1,0,2,3,24,13.6,+7
74,2003-04-10,26-241,BOS,HOME,PHI,L (-21),1,34:34,4,17,...,5,8,3,1,0,4,2,12,3.6,-19
75,2003-04-12,26-243,BOS,AWAY,ORL,L (-3),1,42:39,8,17,...,8,9,7,2,2,3,2,21,19.9,+6
76,2003-04-13,26-244,BOS,AWAY,MIA,W (+8),1,41:05,8,17,...,11,12,4,1,0,1,3,24,20.5,+8


In [13]:
get_game_logs(suffix = suffix, year = 2003)

Request # 2 @ 8:48:0
REQUEST!


Unnamed: 0,DATE,AGE,TEAM,HOME/AWAY,OPPONENT,RESULT,GS,MP,FG,FGA,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GAME_SCORE,+/-
0,2002-10-30,26-079,BOS,HOME,CHI,L (-3),1,43:05,5,23,...,10,11,4,3,0,2,2,14,6.2,-3
1,2002-10-31,26-080,BOS,AWAY,WAS,L (-45),1,34:12,5,16,...,7,9,0,1,0,7,2,12,-0.5,-28
2,2002-11-02,26-082,BOS,AWAY,NYK,W (+10),1,43:22,6,15,...,5,5,10,2,1,4,2,18,15.9,+20
3,2002-11-06,26-086,BOS,AWAY,CHI,W (+22),1,32:24,7,19,...,7,7,5,0,0,2,4,22,12.7,+20
4,2002-11-07,26-087,BOS,HOME,LAL,W (+3),1,47:22,6,17,...,8,9,8,3,1,6,2,13,9.1,+6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,2003-04-09,26-240,BOS,AWAY,WAS,W (+4),1,44:13,8,21,...,8,8,3,1,0,2,3,24,13.6,+7
74,2003-04-10,26-241,BOS,HOME,PHI,L (-21),1,34:34,4,17,...,5,8,3,1,0,4,2,12,3.6,-19
75,2003-04-12,26-243,BOS,AWAY,ORL,L (-3),1,42:39,8,17,...,8,9,7,2,2,3,2,21,19.9,+6
76,2003-04-13,26-244,BOS,AWAY,MIA,W (+8),1,41:05,8,17,...,11,12,4,1,0,1,3,24,20.5,+8


In [None]:
directory = "player_data"
if not os.path.exists(directory):
        os.makedirs(directory)

for year in YEAR_TEAM_ROSTER:
    newDirectory = os.path.join(directory, str(year))
    if not os.path.exists(newDirectory):
        os.makedirs(newDirectory)
    for team in YEAR_TEAM_ROSTER[year]:
        file_path = os.path.join(newDirectory, f'{team}.txt')
        """with open(file_path, 'w', encoding='utf-8') as file:
            for item in YEAR_TEAM_ROSTER[year][team]:
                file.write(f'{item['PLAYER']},{item['BIRTH_DATE']}\n')"""
        YEAR_TEAM_ROSTER[year][team].to_csv(file_path, index=False)

In [62]:
# WRITE TEAM DATA

directory = "player_data"
if not os.path.exists(directory):
    os.makedirs(directory)
for key in CATEGORIES + ['game_logs']:
    print(key)

    player_dfs[key].to_csv(os.path.join(directory, f'{key}_Player_Data_2001_up_to_min.csv'), index=False)

per_game
totals
per_minute
per_poss
advanced
adj_shooting
shooting
game_logs


In [65]:
for team in TEAMS:
    print(team)

ATL
BOS
CHI
CHO
CLE
DAL
DEN
DET
GSW
HOU
IND
LAC
LAL
MIA
MIL
MIN
NJN
NYK
ORL
PHI
PHO
POR
SAC
SAS
SEA
TOR
UTA
VAN
WAS


In [17]:
get_player_suffix(lookup(player, True),birthdate)

You searched for "Chucky Brown"
3 results found.
Request # 1 @ 7:52:55
REQUEST!


'/players/b/brownch01.html'

Rick Hughes 1973-08-22
