# Extract Stage

In [1]:
import time
from pathlib import Path
from typing import TypeAlias

import pandas as pd
from bs4 import BeautifulSoup as Soup
from pandas import DataFrame
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support.select import Select

### Overview: URLs, Filters, and Helper Functions

#### URL Stat Tables
* Each URL has stat tables for specific stat types as shown here:
| *stats_urls* index | URL                                                     | Stat Type                 | No. of Tables | Range of Seasons   |
| ------------------ | ------------------------------------------------------- | ------------------------- | ------------- | ------------------ |
| 0                  | https://www.nba.com/stats/teams/traditional/            | Teams General Traditional | 26            | 1996-97 to 2021-22 |
| 1                  | https://www.nba.com/stats/teams/advanced/?sort=W&dir=-1 | Teams General Advanced    | 26            | 1996-97 to 2021-22 |
| 2                  | https://www.nba.com/stats/teams/misc/?sort=W&dir=-1     | Teams General Misc        | 26            | 1996-97 to 2021-22 |
| 3                  | https://www.nba.com/stats/teams/clutch-traditional/     | Teams Clutch Traditional  | 26            | 1996-97 to 2021-22 |

#### URL Selectable Filters 
* Each URL has dropdown menu filters for **Season**, **Season Type**, **Per Mode**, and **Season Segment**.
* **Teams General Advanced** does not have a **Per Mode** filter and thus has a mix of per game stats and full season totals. The index for **Season Segment** would be 2 in the list below.
* First find the set of filter options for the stats page using: `filter_options = browser.find_element(By.CLASS_NAME, "nba-stats-primary-split-block")`
* Here is how any one of the filter options `<select>` tag can be collected via `<label>` tag:
    * `label = filter_options.find_elements(By.TAG_NAME, "label")[0]`: **Season**
        * 1996-97 to 2021-22 (default)
    * `label = filter_options.find_elements(By.TAG_NAME, "label")[1]`: **Season Type**
        * Regular Season (default)
        * Playoffs
    * `label = filter_options.find_elements(By.TAG_NAME, "label")[2]`: **Per Mode**
        * Per Game (default)
        * Totals
    * `label = filter_options.find_elements(By.TAG_NAME, "label")[3]`: **Season Segment**
        * All Games (default)
* To actually find the `<select>` tag from the result above we can use `select = label.find_element(By.TAG_NAME, "select")`
* Then the `select` is converted into a `Select` instance via `select = Select(select)`, allowing for calls to `select.select_by_value()` and `select.options` to either select an option or gather a list of options for use elsewhere. The gathered options are `WebElement` instances and have a `click()` method that is used in another function when appropriate to choose that option and update the page.

#### Data to Collect
* All categories except for ranking. 
    * Ranking is the first column, before the team name. This ranking is tied to their default sorting by winning %. We most likely won't need this data.

#### Helper Functions
* 3 Helper functions facilitate the retrieving of stat tables:
    1. `season_stat_table(stat_table_soup)`
        * **Stat table** for **one season** for **one stat type**.
        * `stat_table_soup` contains the html for the current table to be retrieved.
        * Returns the stat table as a pandas DataFrame.
        
        <br>
        
    2. `all_seasons_stat_tables(seasons)`
        * **Stat tables** for **all seasons** for **one stat type**.
        * `seasons` contains the `<option>` tag `value` attributes for each season found in the **Season** selectable filter.
        * `season_stat_table()` is called once for each season.
        * Returns a dictionary. The keys are the season identifiers (for example: `"2021-22"`), and the values are the DataFrames returned by `season_stat_table()`.
            * `{<season_identifier>: <DataFrame>, ...}`
            
        <br> 
         
    3. `retrieve_stats(stats_urls, delay_time)`
        * **Stat tables** for **all seasons** and **all stat types** and both **season types** (regular season and playoffs).
        * `stats_urls` contains all the team statistics URLs for the NBA stats site
        * `all_seasons_stat_tables()` is called once for each stat type (URL).
        * Returns a dictionary. The keys are the stat type (for example: `"Teams General Traditional"`), and the values are the dictionaries returned by `all_seasons_stat_tables()`.
            * `{<stat_type>: <dict>, ...}`

#### Example Usage
* To see the advanced stats table for the `'2021-22'` season for example, we can use the following code:
    ```python
    team_stats, team_stats_playoffs = retrieve_stats(stats_urls, delay_time)
    team_stats["Teams General Advanced"]["2021-22"]
    ```

#### Custom Types

In [2]:
# StatTables -> keys: seasons, values: stat tables as DataFrames.
StatTableCollection: TypeAlias = dict[str, DataFrame]
# TeamStats -> keys: stat type (i.e. "Teams General Traditional", etc.), values:
# StatTableCollection.
TeamStats: TypeAlias = dict[str, StatTableCollection]

#### Helper Function #1: One Season, One Stat Type

In [3]:
def season_stat_table(stat_table_soup: Soup, season: str, stat_type: str) -> DataFrame:
    """Retrieve the table of data for a season via the stat_table_soup html"""
    # Try to collect the table data.
    try:
        stat_table_section = stat_table_soup.select(".nba-stats-content-block")[0]
        stat_table = stat_table_section.select("table")[0]
    except AttributeError as e:
        msg = f"{e}. Stat Type: {stat_type}. Season: {season}."
        raise AttributeError(msg)
    # Find the column names in the header of the table.
    headers = stat_table.find("thead").find_all("th")
    # The conditional is for removing hidden header values that have no meaning
    # to us. The first list element is removed since it refers to a ranking that
    # we will not need.
    headers = [
        header.text.upper() for header in headers if "hidden" not in header.attrs.keys()
    ][1:]

    # Rows that contain the table data.
    rows = stat_table.find("tbody").find_all("tr")

    # dataframe_data will contain dict elements for each row of data.
    dataframe_data = []

    # Loop over each row in the table.
    for row in rows:
        # All the table data "td" tags for a given row (i.e. all column values).
        cols = row.find_all("td")
        # Remove the first element that is a ranking.
        cols = [td.text.strip() for td in cols][1:]

        # row_dict represents the data for an entire row.
        row_dict = {}

        # Loop over each column in a given row, add the value to row_dict with
        # a key that is the column's header name.
        for index, value in enumerate(cols):
            # Add team name string.
            if index == 0:
                row_dict[headers[index]] = value
            # Add team record information: GP, W, and L as integers.
            elif index in (1, 2, 3):
                row_dict[headers[index]] = int(value)
            # Add the remaining team stats as floats.
            else:
                if "," in value:
                    row_dict[headers[index]] = int(value.replace(",", ""))
                else:
                    row_dict[headers[index]] = float(value)
        # Add the row's row_dict to dataframe_data.
        dataframe_data.append(row_dict)

    return pd.DataFrame(dataframe_data)

#### Helper Function #2: All Seasons, One Stat Type

In [4]:
def all_seasons_stat_tables(
    seasons: list[WebElement], stat_type: str, season_type: str
) -> dict[str, DataFrame]:
    """Retrieve dataframes for all seasons"""
    dataframes = {}
    for season in seasons:
        # Select Season --------------------------------------------------------
        season.click()

        # Delay to allow the page to load.
        time.sleep(delay_time)

        # Retrieve html --------------------------------------------------------
        html = browser.page_source
        stat_table_soup = Soup(html, "html.parser")

        # Retrieve DataFrame ---------------------------------------------------
        # Assign the DataFrame to the dataframes dict as the value, and the
        # season string as the key.
        try:
            dataframes[season.text] = season_stat_table(
                stat_table_soup, season.text, stat_type
            )
        except IndexError:
            print(
                f"No {season_type} - {stat_type} data found for the {season.text} season."
            )
    return dataframes

#### Helper Function #3: All Seasons, All Stat Types

In [5]:
def retrieve_stats(
    stats_urls: list[str], delay_time: int | float
) -> tuple[TeamStats, TeamStats]:
    all_stats = {}
    for season_type in ["Regular Season", "Playoffs"]:
        stats = {}
        for url in stats_urls:
            # Visit URL --------------------------------------------------------
            # Visit a team stats url.
            browser.get(url)
            time.sleep(delay_time)

            # Select Season Type -----------------------------------------------
            # Select the season_type option to bring up that season type's
            # table.
            filter_options = browser.find_element(
                By.CLASS_NAME, "nba-stats-primary-split-block"
            )
            season_type_label = filter_options.find_elements(By.TAG_NAME, "label")[1]
            season_type_select = season_type_label.find_element(By.TAG_NAME, "select")
            season_type_select = Select(season_type_select)
            season_type_select.select_by_value(season_type)

            # Delay to allow the page to load.
            time.sleep(delay_time)

            # Retrieve Available Seasons and Their <option> Tag Values ---------
            # Store the season options.
            filter_options = browser.find_element(
                By.CLASS_NAME, "nba-stats-primary-split-block"
            )
            season_label = filter_options.find_elements(By.TAG_NAME, "label")[0]
            season_select = season_label.find_element(By.TAG_NAME, "select")
            season_select = Select(season_select)
            seasons: list[WebElement] = season_select.options

            # Retrieve Table Title Using select_soup ---------------------------
            html = browser.page_source
            select_soup = Soup(html, "html.parser")

            quick_nav_divs = select_soup.select(".nba-stats-quick-nav > div")
            stat_type = [
                div.select("button > span > span")[0].text for div in quick_nav_divs
            ]
            stat_type = " ".join(stat_type)

            # Retrieve the DataFrames for Each Season --------------------------
            stats[stat_type] = all_seasons_stat_tables(seasons, stat_type, season_type)
        all_stats[season_type] = stats

    return all_stats["Regular Season"], all_stats["Playoffs"]

### Web Scrape to Collect the Team Stats Data
#### The Browser Instance

In [6]:
browser = webdriver.Chrome()
# Use in time.sleep() to allow for pages and tables to load
delay_time = 5

The chromedriver version (120.0.6099.71) detected in PATH at C:\Users\cdpet\Documents\chromedriver_win64\chromedriver.exe might not be compatible with the detected chrome version (121.0.6167.161); currently, chromedriver 121.0.6167.85 is recommended for chrome 121.*, so it is advised to delete the driver in PATH and retry


#### Team Statistics URLs for Scraping

In [7]:
# base url and endpoints to visit.
base_url = "https://www.nba.com"
endpoints = [
    "/stats/teams/traditional/",
    "/stats/teams/advanced/?sort=W&dir=-1",
    "/stats/teams/misc/?sort=W&dir=-1",
    "/stats/teams/clutch-traditional/",
]

# Build the team stats urls.
stats_urls = [f"{base_url}{endpoint}" for endpoint in endpoints]
stats_urls

['https://www.nba.com/stats/teams/traditional/',
 'https://www.nba.com/stats/teams/advanced/?sort=W&dir=-1',
 'https://www.nba.com/stats/teams/misc/?sort=W&dir=-1',
 'https://www.nba.com/stats/teams/clutch-traditional/']

#### Collect the Data

In [8]:
team_stats, team_stats_playoffs = retrieve_stats(stats_urls, delay_time)
browser.quit()

No Playoffs - Teams General Traditional data found for the 2023-24 season.
No Playoffs - Teams General Advanced data found for the 2023-24 season.
No Playoffs - Teams General Misc data found for the 2023-24 season.
No Playoffs - Teams Clutch Traditional data found for the 2023-24 season.


In [9]:
team_stats.keys()

dict_keys(['Teams General Traditional', 'Teams General Advanced', 'Teams General Misc', 'Teams Clutch Traditional'])

#### Example Output

In [10]:
team_stats["Teams General Advanced"]["2021-22"]

Unnamed: 0,TEAM,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG,AST%,AST/TO,ASTRATIO,OREB%,DREB%,REB%,TOV%,EFG%,TS%,PACE,PIE,POSS
0,Phoenix Suns,82,64,18,3946.0,114.2,106.8,7.5,62.7,2.12,19.5,26.4,72.8,50.3,12.9,54.9,58.1,100.26,54.8,8242
1,Memphis Grizzlies,82,56,26,3956.0,114.3,108.9,5.3,59.7,1.97,17.9,33.8,72.6,52.6,13.0,52.2,55.3,100.52,53.0,8295
2,Golden State Warriors,82,53,29,3946.0,112.1,106.6,5.5,66.9,1.82,19.5,26.9,73.6,51.0,15.0,55.2,58.2,98.74,53.6,8121
3,Miami Heat,82,53,29,3971.0,113.0,108.4,4.5,64.4,1.75,18.8,27.8,73.5,51.0,14.9,54.7,58.4,96.53,52.9,7987
4,Dallas Mavericks,82,52,30,3951.0,112.5,109.1,3.5,59.5,1.87,17.8,25.6,73.3,49.6,13.0,53.8,57.2,95.64,51.1,7871
5,Boston Celtics,82,51,31,3981.0,113.6,106.2,7.4,60.9,1.82,18.2,27.7,72.5,50.9,13.9,54.2,57.8,97.26,54.7,8068
6,Milwaukee Bucks,82,51,31,3951.0,114.3,111.1,3.2,57.2,1.78,17.3,26.9,74.7,51.2,13.3,54.6,58.0,100.59,51.6,8284
7,Philadelphia 76ers,82,51,31,3961.0,113.0,110.2,2.8,60.2,1.89,17.9,24.6,72.4,49.0,12.9,53.4,57.8,96.71,51.7,7975
8,Utah Jazz,82,49,33,3946.0,116.2,110.0,6.2,55.2,1.6,16.7,30.0,73.8,52.5,14.3,55.5,58.9,97.5,52.8,8014
9,Denver Nuggets,82,48,34,3961.0,113.8,111.5,2.3,66.7,1.92,20.0,26.8,75.2,51.5,14.6,55.6,59.0,98.41,51.6,8123


### Build Playoff Teams and Champions DataFrames

#### Variable to Extract Playoff Teams From
* `team_stats_playoffs["Teams General Traditional"]`

#### Data to Collect
* Team names
* Total wins for that playoff run

#### Helper Functions
* 1 Helper function to build the DataFrames:
    * `build_playoffs_champions_dataframes(team_stats_playoffs)`
        * **Playoff teams** for **all seasons** in **one DataFrame**.
        * **Champion teams** for **all seasons** (except current season) in **one DataFrame**.
        * Returns a tuple of DataFrames: `(playoff_teams, champions)`

#### Usage
* To build these DataFrames, use the following code:
    ```python
    playoff_teams, champions = build_playoffs_champions_dataframes(team_stats_playoffs)
    ```

#### Playoff Teams Helper Function #1: Build Playoff Teams and Champions DataFrames

In [11]:
def build_playoffs_champions_dataframes(
    stats_playoffs: TeamStats,
) -> tuple[DataFrame, DataFrame]:
    # Combine Into One DataFrame -----------------------------------------------
    # Create one DataFrame containing all playoff teams from all seasons. The
    # season identifiers will be the column names, and the team names will be
    # the rows. Additionally, sort by wins descending and drop the win column
    # before joining. The champions from each year will be left at index 0.
    playoffs = stats_playoffs["Teams General Traditional"].copy()

    for index, (season, playoff_df) in enumerate(playoffs.items()):
        playoff_df = playoff_df[["TEAM", "W"]]
        # Sort the DataFrames by wins descending.
        playoff_df = playoff_df.sort_values(by=["W"], ascending=False)
        # Reset the index, and drop the win column.
        playoff_df = playoff_df.reset_index(drop=True).drop(columns="W")
        # Change the column name to the season identifier.
        playoff_df = playoff_df.rename(columns={"TEAM": season})

        if index == 0:
            # Set our output variable `playoff_teams` to the most recent
            # season's playoff teams DataFrame. No `concat()` is needed as we only
            # have 1 DataFrame here.
            playoff_teams = playoff_df
        else:
            # Join each DataFrame to the output variable `playoff_teams`
            playoff_teams = pd.concat([playoff_teams, playoff_df], axis=1)

    champions = playoff_teams.iloc[[0]].transpose()
    champions = champions.rename(columns={0: "TEAM"})
    champions = champions.reset_index(names="SEASON")

    return playoff_teams, champions

#### Retrieve the Playoff Team Data

In [12]:
playoff_teams, champions = build_playoffs_champions_dataframes(team_stats_playoffs)

In [13]:
playoff_teams

Unnamed: 0,2022-23,2021-22,2020-21,2019-20,2018-19,2017-18,2016-17,2015-16,2014-15,2013-14,...,2005-06,2004-05,2003-04,2002-03,2001-02,2000-01,1999-00,1998-99,1997-98,1996-97
0,Denver Nuggets,Golden State Warriors,Milwaukee Bucks,Los Angeles Lakers,Toronto Raptors,Golden State Warriors,Golden State Warriors,Cleveland Cavaliers,Golden State Warriors,San Antonio Spurs,...,Miami Heat,San Antonio Spurs,Detroit Pistons,San Antonio Spurs,Los Angeles Lakers,Los Angeles Lakers,Los Angeles Lakers,San Antonio Spurs,Chicago Bulls,Chicago Bulls
1,Miami Heat,Boston Celtics,Phoenix Suns,Miami Heat,Golden State Warriors,Cleveland Cavaliers,Cleveland Cavaliers,Golden State Warriors,Cleveland Cavaliers,Miami Heat,...,Dallas Mavericks,Detroit Pistons,Los Angeles Lakers,New Jersey Nets,New Jersey Nets,Philadelphia 76ers,Indiana Pacers,New York Knicks,Utah Jazz,Utah Jazz
2,Boston Celtics,Miami Heat,Atlanta Hawks,Boston Celtics,Milwaukee Bucks,Houston Rockets,Boston Celtics,Oklahoma City Thunder,Houston Rockets,Indiana Pacers,...,Detroit Pistons,Miami Heat,Indiana Pacers,Dallas Mavericks,Sacramento Kings,Milwaukee Bucks,Portland Trail Blazers,Indiana Pacers,Indiana Pacers,Houston Rockets
3,Los Angeles Lakers,Dallas Mavericks,LA Clippers,Denver Nuggets,Portland Trail Blazers,Boston Celtics,San Antonio Spurs,Toronto Raptors,Atlanta Hawks,Oklahoma City Thunder,...,Phoenix Suns,Phoenix Suns,Minnesota Timberwolves,Detroit Pistons,Boston Celtics,San Antonio Spurs,New York Knicks,Portland Trail Blazers,Los Angeles Lakers,Miami Heat
4,Philadelphia 76ers,Milwaukee Bucks,Brooklyn Nets,Toronto Raptors,Philadelphia 76ers,New Orleans Pelicans,Washington Wizards,Miami Heat,Los Angeles Clippers,Washington Wizards,...,Los Angeles Clippers,Seattle SuperSonics,New Jersey Nets,Sacramento Kings,Dallas Mavericks,Charlotte Hornets,Miami Heat,Utah Jazz,Charlotte Hornets,New York Knicks
5,New York Knicks,Phoenix Suns,Philadelphia 76ers,LA Clippers,Denver Nuggets,Philadelphia 76ers,Houston Rockets,San Antonio Spurs,Washington Wizards,Los Angeles Clippers,...,Cleveland Cavaliers,Dallas Mavericks,Sacramento Kings,Los Angeles Lakers,Charlotte Hornets,Toronto Raptors,Philadelphia 76ers,Los Angeles Lakers,San Antonio Spurs,Seattle SuperSonics
6,Phoenix Suns,Memphis Grizzlies,Utah Jazz,Milwaukee Bucks,Houston Rockets,Utah Jazz,Toronto Raptors,Portland Trail Blazers,Memphis Grizzlies,Portland Trail Blazers,...,San Antonio Spurs,Indiana Pacers,San Antonio Spurs,Philadelphia 76ers,Detroit Pistons,Dallas Mavericks,Phoenix Suns,Philadelphia 76ers,New York Knicks,Los Angeles Lakers
7,Golden State Warriors,Philadelphia 76ers,Denver Nuggets,Houston Rockets,Boston Celtics,Toronto Raptors,Utah Jazz,Atlanta Hawks,Chicago Bulls,Brooklyn Nets,...,New Jersey Nets,Washington Wizards,Miami Heat,Boston Celtics,San Antonio Spurs,Sacramento Kings,Utah Jazz,Atlanta Hawks,Seattle SuperSonics,Atlanta Hawks
8,Sacramento Kings,Minnesota Timberwolves,Dallas Mavericks,Oklahoma City Thunder,San Antonio Spurs,Indiana Pacers,LA Clippers,Charlotte Hornets,San Antonio Spurs,Atlanta Hawks,...,Los Angeles Lakers,Boston Celtics,New Orleans Hornets,Orlando Magic,Indiana Pacers,New York Knicks,Milwaukee Bucks,Detroit Pistons,Houston Rockets,Detroit Pistons
9,Atlanta Hawks,New Orleans Pelicans,Los Angeles Lakers,Utah Jazz,LA Clippers,Milwaukee Bucks,Atlanta Hawks,Indiana Pacers,Brooklyn Nets,Dallas Mavericks,...,Chicago Bulls,Houston Rockets,Dallas Mavericks,Portland Trail Blazers,Philadelphia 76ers,Utah Jazz,Sacramento Kings,Miami Heat,Miami Heat,Orlando Magic


In [14]:
champions

Unnamed: 0,SEASON,TEAM
0,2022-23,Denver Nuggets
1,2021-22,Golden State Warriors
2,2020-21,Milwaukee Bucks
3,2019-20,Los Angeles Lakers
4,2018-19,Toronto Raptors
5,2017-18,Golden State Warriors
6,2016-17,Golden State Warriors
7,2015-16,Cleveland Cavaliers
8,2014-15,Golden State Warriors
9,2013-14,San Antonio Spurs


### Stack All Per Season DataFrames For Each Stat Type for Storage
#### Seasons and Stat Types

In [15]:
seasons = list(team_stats["Teams General Traditional"].keys())
playoff_seasons = list(team_stats_playoffs["Teams General Traditional"].keys())
stat_types = list(team_stats.keys())

#### Helper Function for Concatenation of Stat Tables

In [16]:
def concat_stat_tables(stats: TeamStats, seasons: list[str], stat_types: list[str]):
    stats = stats.copy()
    for stat_type in stat_types:
        for index, season in enumerate(seasons):
            if index == 0:
                df = stats[stat_type][season]
                # Add the season so that there is an identifier that we can group by.
                df.insert(loc=0, column="SEASON", value=season)
            else:
                df2 = stats[stat_type][season]
                df2.insert(loc=0, column="SEASON", value=season)
                df = pd.concat([df, df2])
        if stat_type == "Teams General Advanced":
            df["POSS"] = df["POSS"].astype("int32")
        stats[stat_type] = df
    return stats

#### Concatenate Stat Tables

In [17]:
team_stats = concat_stat_tables(team_stats, seasons, stat_types)
team_stats_playoffs = concat_stat_tables(
    team_stats_playoffs, playoff_seasons, stat_types
)

In [18]:
team_stats["Teams General Traditional"]

Unnamed: 0,SEASON,TEAM,GP,W,L,WIN%,MIN,PTS,FGM,FGA,...,DREB,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,+/-
0,2023-24,Boston Celtics,51,39,12,0.765,48.5,120.4,43.3,90.6,...,36.6,47.5,26.0,12.6,6.4,6.6,3.8,17.1,18.3,9.6
1,2023-24,Denver Nuggets,52,36,16,0.692,48.0,114.8,43.8,88.8,...,33.1,44.2,28.8,12.5,6.7,5.4,5.0,18.7,18.6,4.3
2,2023-24,Minnesota Timberwolves,52,36,16,0.692,48.3,113.4,41.2,84.2,...,34.7,43.8,26.4,14.9,7.5,5.8,4.9,19.5,20.3,6.3
3,2023-24,Oklahoma City Thunder,51,35,16,0.686,48.4,120.8,44.6,89.0,...,32.7,41.4,27.3,12.6,8.2,6.6,5.3,19.5,19.3,7.7
4,2023-24,Cleveland Cavaliers,50,34,16,0.680,48.2,114.8,42.7,88.6,...,34.5,45.2,26.9,13.6,7.7,4.8,5.1,18.6,19.2,5.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24,1996-97,Philadelphia 76ers,82,22,60,0.268,48.2,100.2,36.6,83.5,...,28.7,44.2,20.7,17.5,8.3,4.8,6.0,21.1,0.1,-6.5
25,1996-97,Denver Nuggets,82,21,61,0.256,48.6,97.8,35.8,81.5,...,29.9,42.0,23.0,16.6,6.1,5.9,5.5,21.8,0.2,-6.3
26,1996-97,San Antonio Spurs,82,20,62,0.244,48.1,90.5,34.5,77.9,...,26.0,39.4,20.3,15.2,7.9,5.3,5.5,21.5,0.2,-7.9
27,1996-97,Boston Celtics,82,15,67,0.183,48.5,100.6,37.4,85.0,...,26.7,40.0,21.9,16.4,9.9,3.8,6.8,23.4,0.1,-7.3


<br>
<hr>
<br>

### Write to CSVs

In [19]:
# Create the data directory if it doesn't exist.
data_dir = Path.cwd().joinpath("data")
data_dir.mkdir(exist_ok=True)

season_type_stats = {"regular_season": team_stats, "playoffs": team_stats_playoffs}

# Team Stats -------------------------------------------------------------------
for season_type, stats in season_type_stats.items():
    for stat_type in stats:
        # Split and join the stat_type string with underscores for the filepath
        # below.
        split_string = stat_type.split()
        stat_type_underscores = "_".join(split_string).lower()

        # If playoff stats, prepend the word "playoffs".
        if season_type == "playoffs":
            stat_type_underscores = f"playoffs_{stat_type_underscores}"

        # Create filepath for writing to
        table_filepath = data_dir.joinpath(f"{stat_type_underscores}.csv")

        # Write the DataFrame to file
        stats[stat_type].to_csv(table_filepath, index=False)

# stat_types_and_seasons.txt ---------------------------------------------------
# Write the stat types and season identifiers to file to make it easier for
# reading in the csv's
with open(data_dir.joinpath("stat_types_and_seasons.txt"), "w") as f:
    for stat_type in stat_types:
        f.write(f"{stat_type}\n")

# Playoffs ---------------------------------------------------------------------
playoff_teams_filepath = data_dir.joinpath("playoff_teams.csv")
playoff_teams.to_csv(playoff_teams_filepath, index=False)

# Champions --------------------------------------------------------------------
champions_filepath = data_dir.joinpath("champions.csv")
champions.to_csv(champions_filepath, index=False)