In [1]:
import os

import pandas as pd
import numpy as np


from datetime import datetime, timedelta
from pytz import timezone

from src.webscraping import (
    activate_web_driver,
    scrape_to_dataframe,
    convert_columns,
    combine_home_visitor,  
    get_todays_matchups,
)

from src.data_processing import (
    process_games,
    add_TARGET,
)

from src.feature_engineering import (
    process_features,
)


from pathlib import Path  #for Windows/Linux compatibility
DATAPATH = Path(r'data')

**Activate Webdriver**

In [2]:
# initiate a webdriver in selenium 
# since website data is dynamically generated

driver = activate_web_driver('chromium')

**Scrape Advanced Boxscore data**

In [3]:


season_range = range(2002, 2023)
stat_types = ['traditional', 'advanced', 'four-factors', 'misc', 'scoring']




for stat_type in stat_types:
    print(f"Starting stat type {stat_type}")
    df_seasons = pd.DataFrame()
    
   
    for season in season_range:
        print(f"Starting season {season}")
        SEASON = f"{season}-{(season-2000+1):02}"
        df = scrape_to_dataframe(driver, SEASON, stat_type=stat_type)
        #concat to existing dataframe
        df_seasons = pd.concat([df_seasons, df], axis=0)
        #print(df)

    #save to csv
    df_seasons.to_csv(DATAPATH / f"{stat_type}_seasons.csv", index=False)



Starting stat type misc
Starting season 2002
https://www.nba.com/stats/teams/boxscores-misc?Season=2002-03
all pages loaded
Starting season 2003
https://www.nba.com/stats/teams/boxscores-misc?Season=2003-04
all pages loaded
Starting season 2004
https://www.nba.com/stats/teams/boxscores-misc?Season=2004-05
all pages loaded
Starting season 2005
https://www.nba.com/stats/teams/boxscores-misc?Season=2005-06
all pages loaded
Starting season 2006
https://www.nba.com/stats/teams/boxscores-misc?Season=2006-07
all pages loaded
Starting season 2007
https://www.nba.com/stats/teams/boxscores-misc?Season=2007-08
all pages loaded
Starting season 2008
https://www.nba.com/stats/teams/boxscores-misc?Season=2008-09
all pages loaded
Starting season 2009
https://www.nba.com/stats/teams/boxscores-misc?Season=2009-10
all pages loaded
Starting season 2010
https://www.nba.com/stats/teams/boxscores-misc?Season=2010-11
all pages loaded
Starting season 2011
https://www.nba.com/stats/teams/boxscores-misc?Season=2

**Close Webdriver**

In [4]:
driver.close() 