In [1]:
import pandas as pd
import uploader
import os

In [2]:
def dflist_to_csv(df_list, folder):
    """take a list of data frames and create a csv file for each data frame"""
    folder = os.path.normpath(folder) 
    
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    for df in df_list:
        file_name = df['Title'][0].replace(" ", "-")
        file_path = os.path.join(folder, f"{file_name}.csv")
        
        if not os.path.exists(file_path):
            df.to_csv(file_path, index=False)

### **URLS** 

In [3]:
schedule_url = 'https://www.espn.com/nba/team/schedule/_/name/atl'
standing_url = 'https://www.espn.com/nba/standings/_/group/league'
player_stats_url = 'https://www.espn.com/nba/stats/player'
team_stats_url = 'https://www.espn.com/nba/stats/team/_/season/2024/seasontype/2'
team_stats_opponent_url = 'https://www.espn.com/nba/stats/team/_/view/opponent'
team_stats_differential_url = 'https://www.espn.com/nba/stats/team/_/view/differential/season/2024/seasontype/2'

### **CSV Folder Names**

In [4]:
schedule_csv = 'csv/schedule' 
standing_csv = 'csv/standing'
player_stats_csv = 'csv/player_stats'
team_stats_csv = 'csv/team_stats'
team_stats_opponent_csv = 'csv/team_stats_opponent'
team_stats_differential_csv = 'csv/team_stats_differential'

### **Upload the data**

##### **Upload schedule**

In [15]:
schedule_df = uploader.upload_schedule(schedule_url)

Atlanta Hawks 2023-24 Preseason NBA Schedule uploaded successfully from https://www.espn.com/nba/team/schedule/_/name/atl/season/2024/seasontype/1
Atlanta Hawks 2023-24 Regular Season NBA Schedule uploaded successfully from https://www.espn.com/nba/team/schedule/_/name/atl/season/2024/seasontype/2
Atlanta Hawks 2022-23 Preseason NBA Schedule uploaded successfully from https://www.espn.com/nba/team/schedule/_/name/atl/season/2023/seasontype/1
Atlanta Hawks 2022-23 Regular Season NBA Schedule uploaded successfully from https://www.espn.com/nba/team/schedule/_/name/atl/season/2023/seasontype/2
Atlanta Hawks 2022-23 Postseason NBA Schedule uploaded successfully from https://www.espn.com/nba/team/schedule/_/name/atl/season/2023/seasontype/3
Atlanta Hawks 2022-23 Play-In NBA Schedule uploaded successfully from https://www.espn.com/nba/team/schedule/_/name/atl/season/2023/seasontype/5
Atlanta Hawks 2021-22 Preseason NBA Schedule uploaded successfully from https://www.espn.com/nba/team/schedul

In [26]:
dflist_to_csv(schedule_df, schedule_csv)

##### **Upload standings**

In [8]:
standings_df = uploader.upload_standings(standing_url)

NBA Standings Regular Season 2023-24 uploaded successfully from https://www.espn.com/nba/standings/_/season/2024/group/league
NBA Standings Pre Season 2023-24 uploaded successfully from https://www.espn.com/nba/standings/_/seasontype/pre/season/2024/group/league
NBA Standings Regular Season 2022-23 uploaded successfully from https://www.espn.com/nba/standings/_/season/2023/group/league
NBA Standings Pre Season 2022-23 uploaded successfully from https://www.espn.com/nba/standings/_/seasontype/pre/season/2023/group/league
NBA Standings Regular Season 2021-22 uploaded successfully from https://www.espn.com/nba/standings/_/season/2022/group/league
NBA Standings Pre Season 2021-22 uploaded successfully from https://www.espn.com/nba/standings/_/seasontype/pre/season/2022/group/league
NBA Standings Regular Season 2020-21 uploaded successfully from https://www.espn.com/nba/standings/_/season/2021/group/league
NBA Standings Pre Season 2020-21 uploaded successfully from https://www.espn.com/nba/

In [10]:
dflist_to_csv(standings_df, standing_csv)

##### **Upload teams stats**

In [5]:
team_stats_df = uploader.upload_teams(team_stats_url)

NBA Team Regular Season Stats 2023-24 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2024/seasontype/2
NBA Team Postseason Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2023/seasontype/3
NBA Team Regular Season Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2023/seasontype/2
NBA Team Postseason Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2022/seasontype/3
NBA Team Regular Season Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2022/seasontype/2
NBA Team Postseason Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2021/seasontype/3
NBA Team Regular Season Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2021/seasontype/2
NBA Team Postseason Stats 2019-20 uploaded successfully from https://www.espn.com/nba/stats/team/_/season/2020/seaso

In [6]:
dflist_to_csv(team_stats_df, team_stats_csv)

In [8]:
team_stats_opponent_df = uploader.upload_teams(team_stats_opponent_url)

NBA Team Regular Season Stats 2023-24 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/opponent/season/2024/seasontype/2
NBA Team Postseason Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/opponent/season/2023/seasontype/3
NBA Team Regular Season Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/opponent/season/2023/seasontype/2
NBA Team Postseason Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/opponent/season/2022/seasontype/3
NBA Team Regular Season Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/opponent/season/2022/seasontype/2
NBA Team Postseason Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/opponent/season/2021/seasontype/3
NBA Team Regular Season Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/opponent/season/2021/seasontype/2
NBA Team Postseaso

In [9]:
dflist_to_csv(team_stats_opponent_df,team_stats_opponent_csv)

In [11]:
team_stats_differential_df = uploader.upload_teams(team_stats_differential_url)

NBA Team Regular Season Stats 2023-24 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/differential/season/2024/seasontype/2
NBA Team Postseason Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/differential/season/2023/seasontype/3
NBA Team Regular Season Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/differential/season/2023/seasontype/2
NBA Team Postseason Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/differential/season/2022/seasontype/3
NBA Team Regular Season Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/differential/season/2022/seasontype/2
NBA Team Postseason Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/differential/season/2021/seasontype/3
NBA Team Regular Season Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/team/_/view/differential/season/2021/sea

In [12]:
dflist_to_csv(team_stats_differential_df,team_stats_differential_csv)

##### **Upload players stats**

In [5]:
player_stats_df = uploader.upload_player(player_stats_url)

NBA Team Regular Season Stats 2023-24 uploaded successfully from https://www.espn.com/nba/stats/player/_/season/2024/seasontype/2
NBA Team Postseason Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/player/_/season/2023/seasontype/3
NBA Team Regular Season Stats 2022-23 uploaded successfully from https://www.espn.com/nba/stats/player/_/season/2023/seasontype/2
NBA Team Postseason Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/player/_/season/2022/seasontype/3
NBA Team Regular Season Stats 2021-22 uploaded successfully from https://www.espn.com/nba/stats/player/_/season/2022/seasontype/2
NBA Team Postseason Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/player/_/season/2021/seasontype/3
NBA Team Regular Season Stats 2020-21 uploaded successfully from https://www.espn.com/nba/stats/player/_/season/2021/seasontype/2
NBA Team Postseason Stats 2019-20 uploaded successfully from https://www.espn.com/nba/stats/player/_/s

KeyboardInterrupt: 

In [None]:
dflist_to_csv(player_stats_df,player_stats_csv)

### TESTS 

In [7]:
import pandas as pd
import bs4
from urllib import request
from selenium import webdriver
import time
from selenium.webdriver.common.by import By


In [8]:
def parse_players(url: str, title: str) -> pd.DataFrame:
    # Create a new Chrome browser instance
    driver = webdriver.Chrome()

    # Open the URL in the browser
    driver.get(url)

    # Close the cookie pop-up 
    try:
        close_button = driver.find_element(By.XPATH, '//button[text()="Continue without Accepting"]')
        close_button.click()
    except:
        pass

    # Click the "Show More" button to load all the content
    try:
        time.sleep(0.35)
        show_more_link = driver.find_element(By.XPATH, '//a[text()="Show More"]')
        while True:
            show_more_link = driver.find_element(By.XPATH, '//a[text()="Show More"]')
            show_more_link.click()
            time.sleep(0.35)  # Allow time for content to load
    except:
        pass

    # Get the page source after JavaScript execution
    html = driver.page_source
    driver.quit()

    # Create BeautifulSoup object to parse the HTML
    soup = bs4.BeautifulSoup(html, "html.parser")

    # Get the players name
    names_table_tag = soup.find_all('a')
    players = [tag.get_text() for tag in names_table_tag[27:-26]]

    # Get the info table
    info_table_tag = soup.find('div', {"class": "Table__Scroller"})
    info_table_soup = bs4.BeautifulSoup(str(info_table_tag), "html.parser")

    # Get the tables lines
    info_lines_list = info_table_soup.find_all('tr')
    # Get the column names
    tr_tag = info_table_soup.find('tr', {"class": "Table__sub-header Table__TR Table__even"})
    columns_name = [column.get_text() for column in tr_tag]
    columns_name = ['Player Name'] + columns_name

    data = []
    for i in range(len(info_lines_list)-1):
        # Get the player  name
        tr_soup_info = bs4.BeautifulSoup(str(info_lines_list[i+1]), "html.parser")

        # Get tplayer information
        td_tags = tr_soup_info.find_all('td')
        
        attributes = [players[i]]  # Initialize with tplayer name and its rank
        for td in td_tags:
            attribute = td.get_text()
            attributes.append(attribute)
        data.append(attributes)

    # Get the data into a dataframe
    df = pd.DataFrame(data, columns=columns_name)

    # Add the Title of the dataframe
    df['Title'] = title

    return df



In [13]:
df1 = parse_players('https://www.espn.com/nba/stats/player/_/season/2020/seasontype/3', 'le titre')
df1

Unnamed: 0,Player Name,POS,GP,MIN,PTS,FGM,FGA,FG%,3PM,3PA,...,FTA,FT%,REB,AST,STL,BLK,TO,DD2,TD3,Title
0,Donovan Mitchell,SG,7,37.7,36.3,11.9,22.4,52.9,4.7,9.1,...,8.3,94.8,5.0,4.9,1.0,0.3,4.1,0,0,le titre
1,Luka Doncic,PG,6,35.8,31.0,10.7,21.3,50.0,2.7,7.3,...,10.7,65.6,9.8,8.7,1.2,0.5,5.2,2,2,le titre
2,Joel Embiid,C,4,36.3,30.0,8.5,18.5,45.9,1.0,4.0,...,14.8,81.4,12.3,1.3,1.5,1.3,3.8,4,0,le titre
3,James Harden,SG,12,37.3,29.6,9.1,19.0,47.8,3.3,9.8,...,9.7,84.5,5.6,7.7,1.5,0.8,3.8,3,0,le titre
4,Kawhi Leonard,SF,13,39.3,28.2,10.1,20.6,48.9,1.8,5.6,...,7.2,86.2,9.3,5.5,2.3,0.8,2.5,6,0,le titre
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,P.J. Tucker,PF,12,34.5,7.9,2.9,7.3,39.8,2.1,5.6,...,0.0,0.0,7.2,1.5,1.1,0.3,1.0,2,0,le titre
96,Aaron Holiday,G,4,18.0,7.8,3.0,5.3,57.1,1.0,2.3,...,1.3,60.0,1.3,2.5,1.0,0.0,1.0,0,0,le titre
97,Gary Clark,F,5,28.8,7.4,2.2,6.6,33.3,2.2,6.4,...,1.0,80.0,5.6,1.4,1.0,0.4,0.4,0,0,le titre
98,Justin Holiday,SF,4,32.8,7.3,2.5,5.3,47.6,2.0,4.0,...,0.8,33.3,3.8,0.8,1.5,1.3,0.8,0,0,le titre


In [40]:
df1.iloc[133]

Player Name    Caleb Martin
POS                      SF
GP                       51
MIN                    27.5
PTS                    10.6
FGM                     4.0
FGA                     8.8
FG%                    45.2
3PM                     1.4
3PA                     3.5
3P%                    38.5
FTM                     1.2
FTA                     1.6
FT%                    77.8
REB                     4.7
AST                     2.1
STL                     0.8
BLK                     0.4
TO                      1.3
DD2                       2
TD3                       0
Title              le titre
Name: 133, dtype: object

In [12]:
def upload_player(teams_url : str) -> list[pd.DataFrame]:

    # Open the URL and read the HTML content
    response = request.urlopen(teams_url)
    html = response.read()

    # Create BeautifulSoup object to parse the HTML
    soup = bs4.BeautifulSoup(html, "lxml")

    # Get the <select> tags
    tag = soup.find('div',{'class' : 'flex flex-wrap'})
    year_select_tag = tag # Select tag for year
    
    # Get all the option tags for teams
    year_soup = bs4.BeautifulSoup(str(year_select_tag), "lxml")
    year_option_tag = year_soup.find_all('option')
    
    # Get the seasons year and the season type (Regular or Preseason)
 
    years_type = []
    for y in year_option_tag:
        if(y.has_attr('value') == False):
            continue
        year_label = y.get_text()[:7]
        type_label = y.get_text()[8:]
        year = y['value'][:4]
        type = y['value'][5:]
        years_type.append((year_label,type_label,year,type))

    # print(years_type)
    
    players = []
    for (year_label,type_label,year,type) in years_type:
        # Get the Regular Season data
        url = f'https://www.espn.com/nba/stats/player/_/season/{year}/seasontype/{type}'
        title = f'NBA Team {type_label} Stats {year_label}'

        df = table_parser.parse_players(url, title)
        players.append(df)

        print(f'{title} uploaded successfully from {url}')

    return players

In [10]:
team_stats_df = upload_teams('https://www.espn.com/nba/stats/team/_/season/2016/seasontype/2')

NameError: name 'upload_teams' is not defined