# Transform and Load Stages

In [1]:
import time
import tomllib
from pathlib import Path

import pandas as pd
from sqlalchemy import create_engine

### Read in the CSVs

In [2]:
data_dir = Path.cwd().joinpath("data")

# Team Stats -------------------------------------------------------------------
with open(data_dir.joinpath("stat_types_and_seasons.txt"), "r") as f:
    data = f.read()
    stat_types = data.split("\n")[:-1]

all_stats = {}
for season_type in ["Regular Season", "Playoffs"]:
    stats = {}
    for stat_type in stat_types:
        # Split and join the stat_type string with underscores for the filepath
        # below.
        split_string = stat_type.split()
        stat_type_underscores = "_".join(split_string).lower()
        if season_type == "Playoffs":
            stat_type_underscores = f"playoffs_{stat_type_underscores}"

        # Create filepath for reading from
        table_filepath = data_dir.joinpath(f"{stat_type_underscores}.csv")

        # Read the DataFrame from file
        stats_df = pd.read_csv(table_filepath)
        stats[stat_type] = stats_df
    all_stats[season_type] = stats
team_stats = all_stats["Regular Season"]
team_stats_playoffs = all_stats["Playoffs"]

# Playoffs ---------------------------------------------------------------------
playoff_teams_filepath = data_dir.joinpath("playoff_teams_df.csv")
playoff_teams_df = pd.read_csv(playoff_teams_filepath)

# Champions --------------------------------------------------------------------
champions_filepath = data_dir.joinpath("champions_df.csv")
champions_df = pd.read_csv(champions_filepath)

In [3]:
team_stats.keys()

dict_keys(['Teams General Traditional', 'Teams General Advanced', 'Teams General Misc', 'Teams Clutch Traditional'])

### Test That the Data Loaded Correctly

In [4]:
print("Teams General Traditional length:", len(team_stats["Teams General Traditional"]))
print("Teams General Advanced length:", len(team_stats["Teams General Advanced"]))
print("Teams General Misc length:", len(team_stats["Teams General Misc"]))
print("Teams Clutch Traditional length:", len(team_stats["Teams Clutch Traditional"]))
team_stats["Teams General Advanced"]

Teams General Traditional length: 806
Teams General Advanced length: 806
Teams General Misc length: 806
Teams Clutch Traditional length: 804


Unnamed: 0,SEASON,TEAM,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG,AST%,...,ASTRATIO,OREB%,DREB%,REB%,TOV%,EFG%,TS%,PACE,PIE,POSS
0,2023-24,Denver Nuggets,1,1,0,48.0,125.3,111.5,13.8,60.4,...,21.2,28.9,65.4,48.5,12.6,60.4,61.8,95.50,57.9,95.0
1,2023-24,Phoenix Suns,1,1,0,48.0,106.9,102.0,5.0,54.8,...,15.9,39.6,64.2,53.3,18.8,50.0,52.7,101.50,56.4,101.0
2,2023-24,Golden State Warriors,1,0,1,48.0,102.0,106.9,-5.0,52.8,...,13.1,35.8,60.4,46.7,10.8,40.6,45.9,101.50,43.6,102.0
3,2023-24,Los Angeles Lakers,1,0,1,48.0,111.5,125.3,-13.8,56.1,...,16.8,34.6,71.1,51.5,12.5,51.1,54.1,95.50,42.1,96.0
4,2022-23,Milwaukee Bucks,82,58,24,3966.0,114.3,110.9,3.4,60.4,...,18.1,28.4,74.5,52.1,14.3,55.5,58.3,101.45,52.3,8389.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,1996-97,Philadelphia 76ers,82,22,60,3956.0,102.6,109.5,-6.9,56.4,...,15.2,35.9,65.4,50.4,17.9,47.0,51.8,97.05,45.3,8006.0
802,1996-97,Denver Nuggets,82,21,61,3986.0,103.1,109.7,-6.6,64.4,...,17.4,32.4,66.6,49.6,17.5,48.6,53.0,93.67,46.1,7776.0
803,1996-97,San Antonio Spurs,82,20,62,3946.0,102.1,110.8,-8.8,58.8,...,16.3,34.4,64.3,49.0,17.1,47.2,51.2,88.45,44.0,7268.0
804,1996-97,Boston Celtics,82,15,67,3981.0,102.9,110.1,-7.2,58.4,...,16.1,32.9,65.8,48.2,16.7,47.4,52.0,96.78,43.4,8014.0


In [5]:
print(
    "Playoffs Teams General Traditional length:",
    len(team_stats_playoffs["Teams General Traditional"]),
)
print(
    "Playoffs Teams General Advanced length:",
    len(team_stats_playoffs["Teams General Advanced"]),
)
print(
    "Playoffs Teams General Misc length:",
    len(team_stats_playoffs["Teams General Misc"]),
)
print(
    "Playoffs Teams Clutch Traditional length:",
    len(team_stats_playoffs["Teams Clutch Traditional"]),
)
team_stats_playoffs["Teams General Advanced"]

Playoffs Teams General Traditional length: 432
Playoffs Teams General Advanced length: 432
Playoffs Teams General Misc length: 432
Playoffs Teams Clutch Traditional length: 421


Unnamed: 0,SEASON,TEAM,GP,W,L,MIN,OFFRTG,DEFRTG,NETRTG,AST%,...,ASTRATIO,OREB%,DREB%,REB%,TOV%,EFG%,TS%,PACE,PIE,POSS
0,2022-23,Denver Nuggets,20,16,4,965.0,118.2,110.2,8.0,60.9,...,19.1,30.0,76.2,53.8,12.1,55.7,59.3,95.18,55.3,1919.0
1,2022-23,Miami Heat,23,13,10,1109.0,113.8,111.9,1.9,59.3,...,17.9,27.2,70.8,48.7,12.3,53.1,56.7,94.87,49.8,2189.0
2,2022-23,Boston Celtics,20,11,9,965.0,116.1,112.9,3.2,59.1,...,18.2,26.6,71.6,49.7,12.9,56.0,59.4,95.83,52.6,1929.0
3,2022-23,Los Angeles Lakers,16,8,8,773.0,112.7,110.3,2.4,60.7,...,18.2,26.1,70.9,49.8,12.0,52.9,57.3,99.04,53.2,1595.0
4,2022-23,Philadelphia 76ers,11,7,4,533.0,110.2,112.1,-2.0,52.0,...,15.2,29.6,76.6,52.7,13.3,51.0,55.5,92.08,48.3,1024.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,1996-97,Portland Trail Blazers,4,1,3,192.0,102.6,109.9,-7.4,60.3,...,16.4,29.9,66.5,47.8,17.7,47.9,52.8,87.75,41.8,350.0
428,1996-97,Charlotte Hornets,3,0,3,144.0,107.1,117.7,-10.6,50.5,...,14.9,35.5,74.8,52.3,14.9,49.2,53.5,89.00,42.0,268.0
429,1996-97,Los Angeles Clippers,3,0,3,144.0,104.9,118.9,-13.9,50.0,...,14.0,26.8,63.5,43.2,13.6,47.6,52.7,88.17,36.2,264.0
430,1996-97,Minnesota Timberwolves,3,0,3,144.0,111.2,122.9,-11.7,60.0,...,18.6,31.6,63.6,46.1,10.4,48.7,51.6,90.00,42.5,269.0


In [6]:
playoff_teams_df

Unnamed: 0,2022-23,2021-22,2020-21,2019-20,2018-19,2017-18,2016-17,2015-16,2014-15,2013-14,...,2005-06,2004-05,2003-04,2002-03,2001-02,2000-01,1999-00,1998-99,1997-98,1996-97
0,Denver Nuggets,Golden State Warriors,Milwaukee Bucks,Los Angeles Lakers,Toronto Raptors,Golden State Warriors,Golden State Warriors,Cleveland Cavaliers,Golden State Warriors,San Antonio Spurs,...,Miami Heat,San Antonio Spurs,Detroit Pistons,San Antonio Spurs,Los Angeles Lakers,Los Angeles Lakers,Los Angeles Lakers,San Antonio Spurs,Chicago Bulls,Chicago Bulls
1,Miami Heat,Boston Celtics,Phoenix Suns,Miami Heat,Golden State Warriors,Cleveland Cavaliers,Cleveland Cavaliers,Golden State Warriors,Cleveland Cavaliers,Miami Heat,...,Dallas Mavericks,Detroit Pistons,Los Angeles Lakers,New Jersey Nets,New Jersey Nets,Philadelphia 76ers,Indiana Pacers,New York Knicks,Utah Jazz,Utah Jazz
2,Boston Celtics,Miami Heat,Atlanta Hawks,Boston Celtics,Milwaukee Bucks,Houston Rockets,Boston Celtics,Oklahoma City Thunder,Houston Rockets,Indiana Pacers,...,Detroit Pistons,Miami Heat,Indiana Pacers,Dallas Mavericks,Sacramento Kings,Milwaukee Bucks,Portland Trail Blazers,Indiana Pacers,Indiana Pacers,Houston Rockets
3,Los Angeles Lakers,Dallas Mavericks,LA Clippers,Denver Nuggets,Portland Trail Blazers,Boston Celtics,San Antonio Spurs,Toronto Raptors,Atlanta Hawks,Oklahoma City Thunder,...,Phoenix Suns,Phoenix Suns,Minnesota Timberwolves,Detroit Pistons,Boston Celtics,San Antonio Spurs,New York Knicks,Portland Trail Blazers,Los Angeles Lakers,Miami Heat
4,Philadelphia 76ers,Milwaukee Bucks,Brooklyn Nets,Toronto Raptors,Philadelphia 76ers,New Orleans Pelicans,Washington Wizards,Miami Heat,Los Angeles Clippers,Washington Wizards,...,Los Angeles Clippers,Seattle SuperSonics,New Jersey Nets,Sacramento Kings,Dallas Mavericks,Charlotte Hornets,Miami Heat,Utah Jazz,Charlotte Hornets,New York Knicks
5,New York Knicks,Phoenix Suns,Philadelphia 76ers,LA Clippers,Denver Nuggets,Philadelphia 76ers,Houston Rockets,San Antonio Spurs,Washington Wizards,Los Angeles Clippers,...,Cleveland Cavaliers,Dallas Mavericks,Sacramento Kings,Los Angeles Lakers,Charlotte Hornets,Toronto Raptors,Philadelphia 76ers,Los Angeles Lakers,San Antonio Spurs,Seattle SuperSonics
6,Phoenix Suns,Memphis Grizzlies,Utah Jazz,Milwaukee Bucks,Houston Rockets,Utah Jazz,Toronto Raptors,Portland Trail Blazers,Memphis Grizzlies,Portland Trail Blazers,...,San Antonio Spurs,Indiana Pacers,San Antonio Spurs,Philadelphia 76ers,Detroit Pistons,Dallas Mavericks,Phoenix Suns,Philadelphia 76ers,New York Knicks,Los Angeles Lakers
7,Golden State Warriors,Philadelphia 76ers,Denver Nuggets,Houston Rockets,Boston Celtics,Toronto Raptors,Utah Jazz,Atlanta Hawks,Chicago Bulls,Brooklyn Nets,...,New Jersey Nets,Washington Wizards,Miami Heat,Boston Celtics,San Antonio Spurs,Sacramento Kings,Utah Jazz,Atlanta Hawks,Seattle SuperSonics,Atlanta Hawks
8,Sacramento Kings,Minnesota Timberwolves,Dallas Mavericks,Oklahoma City Thunder,San Antonio Spurs,Indiana Pacers,LA Clippers,Charlotte Hornets,San Antonio Spurs,Atlanta Hawks,...,Los Angeles Lakers,Boston Celtics,New Orleans Hornets,Orlando Magic,Indiana Pacers,New York Knicks,Milwaukee Bucks,Detroit Pistons,Houston Rockets,Detroit Pistons
9,Atlanta Hawks,New Orleans Pelicans,Los Angeles Lakers,Utah Jazz,LA Clippers,Milwaukee Bucks,Atlanta Hawks,Indiana Pacers,Brooklyn Nets,Dallas Mavericks,...,Chicago Bulls,Houston Rockets,Dallas Mavericks,Portland Trail Blazers,Philadelphia 76ers,Utah Jazz,Sacramento Kings,Miami Heat,Miami Heat,Orlando Magic


In [7]:
champions_df

Unnamed: 0,SEASON,TEAM
0,2022-23,Denver Nuggets
1,2021-22,Golden State Warriors
2,2020-21,Milwaukee Bucks
3,2019-20,Los Angeles Lakers
4,2018-19,Toronto Raptors
5,2017-18,Golden State Warriors
6,2016-17,Golden State Warriors
7,2015-16,Cleveland Cavaliers
8,2014-15,Golden State Warriors
9,2013-14,San Antonio Spurs


<br>
<hr>
<br>

## Transform Stage
### Current Variables

#### Team Stats
* `team_stats`: *dict* of the form `{<stat_type>: <DataFrame>, ...}` with the following 4 stat_type keys:
    * "Teams General Traditional"
    * "Teams General Advanced"
    * "Teams General Misc"
    * "Teams Clutch Traditional"
        
    <br>
        
    * Example Usage:
    ```python
    team_stats["Teams General Advanced"]
    ```

#### Playoff Team Stats
* `team_stats_playoffs`: *dict* of the form `{<stat_type>: <DataFrame>, ...}` with the following 4 stat_type keys:
    * "Teams General Traditional"
    * "Teams General Advanced"
    * "Teams General Misc"
    * "Teams Clutch Traditional"
        
    <br>
        
    * Example Usage:
    ```python
    team_stats_playoffs["Teams General Advanced"]
    ```
    
#### Playoff Teams
* `playoff_teams_df`: *DataFrame*, each season is a column, and each row is a playoff team for that season

#### Champions
* `champions_df`: *DataFrame*, the index is the season and the TEAM column is the name of the champion team for that season

### Seasons

In [8]:
seasons = team_stats["Teams General Traditional"]["SEASON"].unique()
seasons = pd.to_datetime(seasons, format="%Y-%y").year

playoff_seasons = team_stats_playoffs["Teams General Traditional"]["SEASON"].unique()
playoff_seasons = pd.to_datetime(playoff_seasons, format="%Y-%y").year

### Transform `playoff_teams_df` and Create a Long Format Version of `playoff_teams_df`
#### Convert Column Names of `playoffs_df` to the Second Year as a 4 Number Integer

In [9]:
playoff_teams_df.columns = playoff_seasons
playoff_teams_df = playoff_teams_df.astype(pd.StringDtype())
playoff_teams_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 27 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   2023    16 non-null     string
 1   2022    16 non-null     string
 2   2021    16 non-null     string
 3   2020    16 non-null     string
 4   2019    16 non-null     string
 5   2018    16 non-null     string
 6   2017    16 non-null     string
 7   2016    16 non-null     string
 8   2015    16 non-null     string
 9   2014    16 non-null     string
 10  2013    16 non-null     string
 11  2012    16 non-null     string
 12  2011    16 non-null     string
 13  2010    16 non-null     string
 14  2009    16 non-null     string
 15  2008    16 non-null     string
 16  2007    16 non-null     string
 17  2006    16 non-null     string
 18  2005    16 non-null     string
 19  2004    16 non-null     string
 20  2003    16 non-null     string
 21  2002    16 non-null     string
 22  2001    16 non-null     stri

In [10]:
playoff_teams_df.head()

Unnamed: 0,2023,2022,2021,2020,2019,2018,2017,2016,2015,2014,...,2006,2005,2004,2003,2002,2001,2000,1999,1998,1997
0,Denver Nuggets,Golden State Warriors,Milwaukee Bucks,Los Angeles Lakers,Toronto Raptors,Golden State Warriors,Golden State Warriors,Cleveland Cavaliers,Golden State Warriors,San Antonio Spurs,...,Miami Heat,San Antonio Spurs,Detroit Pistons,San Antonio Spurs,Los Angeles Lakers,Los Angeles Lakers,Los Angeles Lakers,San Antonio Spurs,Chicago Bulls,Chicago Bulls
1,Miami Heat,Boston Celtics,Phoenix Suns,Miami Heat,Golden State Warriors,Cleveland Cavaliers,Cleveland Cavaliers,Golden State Warriors,Cleveland Cavaliers,Miami Heat,...,Dallas Mavericks,Detroit Pistons,Los Angeles Lakers,New Jersey Nets,New Jersey Nets,Philadelphia 76ers,Indiana Pacers,New York Knicks,Utah Jazz,Utah Jazz
2,Boston Celtics,Miami Heat,Atlanta Hawks,Boston Celtics,Milwaukee Bucks,Houston Rockets,Boston Celtics,Oklahoma City Thunder,Houston Rockets,Indiana Pacers,...,Detroit Pistons,Miami Heat,Indiana Pacers,Dallas Mavericks,Sacramento Kings,Milwaukee Bucks,Portland Trail Blazers,Indiana Pacers,Indiana Pacers,Houston Rockets
3,Los Angeles Lakers,Dallas Mavericks,LA Clippers,Denver Nuggets,Portland Trail Blazers,Boston Celtics,San Antonio Spurs,Toronto Raptors,Atlanta Hawks,Oklahoma City Thunder,...,Phoenix Suns,Phoenix Suns,Minnesota Timberwolves,Detroit Pistons,Boston Celtics,San Antonio Spurs,New York Knicks,Portland Trail Blazers,Los Angeles Lakers,Miami Heat
4,Philadelphia 76ers,Milwaukee Bucks,Brooklyn Nets,Toronto Raptors,Philadelphia 76ers,New Orleans Pelicans,Washington Wizards,Miami Heat,Los Angeles Clippers,Washington Wizards,...,Los Angeles Clippers,Seattle SuperSonics,New Jersey Nets,Sacramento Kings,Dallas Mavericks,Charlotte Hornets,Miami Heat,Utah Jazz,Charlotte Hornets,New York Knicks


#### Playoff Teams (long format DataFrame)

In [11]:
playoff_teams_long_df = playoff_teams_df.melt(var_name="SEASON", value_name="TEAM")
playoff_teams_long_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 432 entries, 0 to 431
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   SEASON  432 non-null    int32 
 1   TEAM    432 non-null    string
dtypes: int32(1), string(1)
memory usage: 5.2 KB


In [12]:
playoff_teams_long_df.head()

Unnamed: 0,SEASON,TEAM
0,2023,Denver Nuggets
1,2023,Miami Heat
2,2023,Boston Celtics
3,2023,Los Angeles Lakers
4,2023,Philadelphia 76ers


### Transform `champions_df`

In [13]:
champions_df["SEASON"] = pd.to_datetime(champions_df["SEASON"], format="%Y-%y").dt.year
champions_df["TEAM"] = champions_df["TEAM"].astype(pd.StringDtype())
champions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   SEASON  27 non-null     int32 
 1   TEAM    27 non-null     string
dtypes: int32(1), string(1)
memory usage: 456.0 bytes


In [14]:
champions_df.head()

Unnamed: 0,SEASON,TEAM
0,2023,Denver Nuggets
1,2022,Golden State Warriors
2,2021,Milwaukee Bucks
3,2020,Los Angeles Lakers
4,2019,Toronto Raptors


### Transform `team_stats`
#### Transform Team Stats Helper Function

In [15]:
def transform_team_stats_table(stats_table_input):
    stats_table = stats_table_input.copy()
    stats_table["SEASON"] = pd.to_datetime(
        stats_table["SEASON"], format="%Y-%y"
    ).dt.year

    # Intialize PLAYOFFS and CHAMPION boolean columns
    stats_table["PLAYOFFS"] = False
    stats_table["CHAMPION"] = False

    # Loop over each season and that season's playoff teams
    for season, playoff_teams in playoff_teams_df.items():
        # Compare the teams from the `season` to `playoff_teams` from the same `season`, the boolean will be True if the team was/is a playoff team
        playoffs_boolean_column = stats_table.loc[
            stats_table["SEASON"] == season, "TEAM"
        ].isin(playoff_teams)
        stats_table.loc[
            stats_table["SEASON"] == season, "PLAYOFFS"
        ] = playoffs_boolean_column

        # Compare the teams from the `season` to the champion from the same `season`, the boolean will be True if the team was the champion team
        champion_series = champions_df.loc[
            champions_df["SEASON"] == season, "TEAM"
        ].reset_index(drop=True)
        champion = champion_series.loc[0]
        stats_table.loc[
            (stats_table["SEASON"] == season) & (stats_table["TEAM"] == champion),
            "CHAMPION",
        ] = True

    return stats_table

#### Transform Team Stats

In [16]:
for stat_type, stats_table in team_stats.items():
    team_stats[stat_type] = transform_team_stats_table(stats_table)
team_stats["Teams General Traditional"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 806 entries, 0 to 805
Data columns (total 30 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   SEASON    806 non-null    int32  
 1   TEAM      806 non-null    object 
 2   GP        806 non-null    int64  
 3   W         806 non-null    int64  
 4   L         806 non-null    int64  
 5   WIN%      806 non-null    float64
 6   MIN       806 non-null    float64
 7   PTS       806 non-null    float64
 8   FGM       806 non-null    float64
 9   FGA       806 non-null    float64
 10  FG%       806 non-null    float64
 11  3PM       806 non-null    float64
 12  3PA       806 non-null    float64
 13  3P%       806 non-null    float64
 14  FTM       806 non-null    float64
 15  FTA       806 non-null    float64
 16  FT%       806 non-null    float64
 17  OREB      806 non-null    float64
 18  DREB      806 non-null    float64
 19  REB       806 non-null    float64
 20  AST       806 non-null    float6

In [17]:
team_stats["Teams General Traditional"].head(10)

Unnamed: 0,SEASON,TEAM,GP,W,L,WIN%,MIN,PTS,FGM,FGA,...,AST,TOV,STL,BLK,BLKA,PF,PFD,+/-,PLAYOFFS,CHAMPION
0,2024,Denver Nuggets,1,1,0,1.0,48.0,119.0,48.0,91.0,...,29.0,12.0,9.0,6.0,4.0,15.0,18.0,12.0,False,False
1,2024,Phoenix Suns,1,1,0,1.0,48.0,108.0,42.0,95.0,...,23.0,19.0,5.0,7.0,6.0,22.0,23.0,4.0,False,False
2,2024,Golden State Warriors,1,0,1,0.0,48.0,104.0,36.0,101.0,...,19.0,11.0,11.0,6.0,7.0,23.0,22.0,-4.0,False,False
3,2024,Los Angeles Lakers,1,0,1,0.0,48.0,107.0,41.0,90.0,...,23.0,12.0,5.0,4.0,6.0,18.0,15.0,-12.0,False,False
4,2023,Milwaukee Bucks,82,58,24,0.707,48.4,116.9,42.7,90.4,...,25.8,14.6,6.4,4.9,3.9,18.0,19.0,3.6,True,False
5,2023,Boston Celtics,82,57,25,0.695,48.7,117.9,42.2,88.8,...,26.7,13.4,6.4,5.2,3.9,18.8,19.1,6.5,True,False
6,2023,Philadelphia 76ers,82,54,28,0.659,48.5,115.2,40.8,83.8,...,25.2,13.7,7.7,4.7,4.6,20.4,19.6,4.3,True,False
7,2023,Denver Nuggets,82,53,29,0.646,48.2,115.8,43.6,86.4,...,28.9,14.5,7.5,4.5,4.2,18.6,19.5,3.3,True,True
8,2023,Cleveland Cavaliers,82,51,31,0.622,48.5,112.3,41.6,85.2,...,24.9,13.3,7.1,4.7,4.4,19.0,20.4,5.4,True,False
9,2023,Memphis Grizzlies,82,51,31,0.622,48.2,116.9,43.7,92.1,...,26.0,13.6,8.3,5.8,5.2,20.0,20.0,3.9,True,False


### Transform `team_stats_playoffs`
#### Transform Team Stats Playoffs Helper Function

In [18]:
def transform_team_stats_playoffs_table(stats_table_input):
    stats_table = stats_table_input.copy()
    stats_table["SEASON"] = pd.to_datetime(
        stats_table["SEASON"], format="%Y-%y"
    ).dt.year
    # Intialize CHAMPION boolean column
    stats_table["CHAMPION"] = False

    # Loop over each season and that season's playoff teams
    for season in playoff_seasons:
        # Compare the teams from the `season` to the champion from the same `season`, the boolean will be True if the team was the champion team
        champion_series = champions_df.loc[
            champions_df["SEASON"] == season, "TEAM"
        ].reset_index(drop=True)
        champion = champion_series.loc[0]
        stats_table.loc[
            (stats_table["SEASON"] == season) & (stats_table["TEAM"] == champion),
            "CHAMPION",
        ] = True

    return stats_table

#### Transform Team Stats Playoffs

In [19]:
for stat_type, playoff_stats_table in team_stats_playoffs.items():
    team_stats_playoffs[stat_type] = transform_team_stats_playoffs_table(
        playoff_stats_table
    )
team_stats_playoffs["Teams General Traditional"].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 432 entries, 0 to 431
Data columns (total 29 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   SEASON    432 non-null    int32  
 1   TEAM      432 non-null    object 
 2   GP        432 non-null    int64  
 3   W         432 non-null    int64  
 4   L         432 non-null    int64  
 5   WIN%      432 non-null    float64
 6   MIN       432 non-null    float64
 7   PTS       432 non-null    float64
 8   FGM       432 non-null    float64
 9   FGA       432 non-null    float64
 10  FG%       432 non-null    float64
 11  3PM       432 non-null    float64
 12  3PA       432 non-null    float64
 13  3P%       432 non-null    float64
 14  FTM       432 non-null    float64
 15  FTA       432 non-null    float64
 16  FT%       432 non-null    float64
 17  OREB      432 non-null    float64
 18  DREB      432 non-null    float64
 19  REB       432 non-null    float64
 20  AST       432 non-null    float6

In [20]:
team_stats_playoffs["Teams General Traditional"].head(10)

Unnamed: 0,SEASON,TEAM,GP,W,L,WIN%,MIN,PTS,FGM,FGA,...,REB,AST,TOV,STL,BLK,BLKA,PF,PFD,+/-,CHAMPION
0,2023,Denver Nuggets,20,16,4,0.8,48.3,113.5,42.3,86.0,...,44.8,25.8,11.6,6.9,4.2,4.7,19.3,21.0,8.3,True
1,2023,Philadelphia 76ers,11,7,4,0.636,48.5,102.5,36.7,84.0,...,43.0,19.1,12.4,7.6,5.3,5.5,18.7,16.0,-1.5,False
2,2023,Miami Heat,23,13,10,0.565,48.2,108.3,39.7,86.8,...,40.3,23.6,11.7,7.0,3.6,4.7,20.1,18.7,1.5,False
3,2023,Boston Celtics,20,11,9,0.55,48.3,112.0,40.9,85.5,...,43.1,24.2,12.4,6.0,6.4,4.7,17.2,17.7,3.4,False
4,2023,New York Knicks,11,6,5,0.545,48.0,100.1,35.3,81.5,...,45.4,18.7,14.6,7.0,4.5,3.8,19.5,21.6,0.4,False
5,2023,Phoenix Suns,11,6,5,0.545,48.0,114.2,43.1,86.8,...,40.0,24.5,12.1,6.3,5.6,4.0,22.0,20.8,-2.3,False
6,2023,Los Angeles Lakers,16,8,8,0.5,48.3,112.3,41.1,87.1,...,44.6,24.9,11.9,6.8,6.4,3.8,17.4,21.3,2.4,False
7,2023,Golden State Warriors,13,6,7,0.462,48.0,113.1,41.9,93.6,...,47.1,28.2,14.4,7.2,4.6,4.6,22.9,19.6,-0.2,False
8,2023,Sacramento Kings,7,3,4,0.429,48.0,113.7,41.0,95.6,...,47.7,22.0,14.1,7.7,4.3,5.6,22.6,22.7,-2.1,False
9,2023,Atlanta Hawks,6,2,4,0.333,48.0,115.8,44.0,96.3,...,43.8,24.5,12.8,7.7,4.8,7.2,16.3,16.5,-5.3,False


<br>
<hr>
<br>

## Load Stage
### Connect to the Database

In [None]:
project_dir = Path.cwd().parent
secrets_path = project_dir.joinpath("secrets.toml")

with open(secrets_path, "rb") as f:
    secrets = tomllib.load(f)

# database and root user credentials
host = secrets["DATABASE"]["HOST"]
port = secrets["DATABASE"]["PORT"]
db = secrets["DATABASE"]["DB"]

# user credentials
user = secrets["USER"]["USERNAME"]
pwd = secrets["USER"]["PASSWORD"]

# user db url
db_url = f"postgresql://{user}:{pwd}@{host}:{port}/{db}"

# create the engine
engine = create_engine(db_url)

### Load: Regular Season Team Statistics

In [None]:
start_time = time.time()
teams_traditional = team_stats["Teams General Traditional"]
teams_advanced = team_stats["Teams General Advanced"]
teams_misc = team_stats["Teams General Misc"]
teams_clutch = team_stats["Teams Clutch Traditional"]

teams_table_names = [
    "teams_traditional",
    "teams_advanced",
    "teams_misc",
    "teams_clutch",
]

teams_traditional.to_sql(
    name=teams_table_names[0], con=engine, if_exists="replace", index=False
)
teams_advanced.to_sql(
    name=teams_table_names[1], con=engine, if_exists="replace", index=False
)
teams_misc.to_sql(
    name=teams_table_names[2], con=engine, if_exists="replace", index=False
)
teams_clutch.to_sql(
    name=teams_table_names[3], con=engine, if_exists="replace", index=False
)

print(f"Reg Season Done. {time.time() - start_time} total seconds elapsed")

### Load: Playoff Team Statistics

In [None]:
start_time = time.time()
playoffs_traditional = team_stats_playoffs["Teams General Traditional"]
playoffs_advanced = team_stats_playoffs["Teams General Advanced"]
playoffs_misc = team_stats_playoffs["Teams General Misc"]
playoffs_clutch = team_stats_playoffs["Teams Clutch Traditional"]

playoffs_table_names = [
    "playoffs_traditional",
    "playoffs_advanced",
    "playoffs_misc",
    "playoffs_clutch",
]

playoffs_traditional.to_sql(
    name=playoffs_table_names[0], con=engine, if_exists="replace", index=False
)
playoffs_advanced.to_sql(
    name=playoffs_table_names[1], con=engine, if_exists="replace", index=False
)
playoffs_misc.to_sql(
    name=playoffs_table_names[2], con=engine, if_exists="replace", index=False
)
playoffs_clutch.to_sql(
    name=playoffs_table_names[3], con=engine, if_exists="replace", index=False
)

print(f"Playoffs Done. {time.time() - start_time} total seconds elapsed")

### Load: Playoff Teams and Champions

In [None]:
playoff_teams_df.to_sql(
    name="playoff_teams", con=engine, if_exists="replace", index=False
)
playoff_teams_long_df.to_sql(
    name="playoff_teams_long", con=engine, if_exists="replace", index=False
)

champions_df.to_sql(name="champions", con=engine, if_exists="replace", index=False)

### Assign Primary Keys

In [None]:
stats_table_names = teams_table_names + playoffs_table_names
composite_key = '"TEAM", "SEASON"'

with engine.connect() as connection:
    # Add the primary key for each of the stats tables
    for table_name in stats_table_names:
        query = f"ALTER TABLE {table_name} ADD PRIMARY KEY({composite_key});"
        connection.execute(query)
    # Add the primary key for the additional playoff teams and champions tables
    query = f"ALTER TABLE playoff_teams_long ADD PRIMARY KEY({composite_key});"
    connection.execute(query)
    query = 'ALTER TABLE champions ADD PRIMARY KEY("SEASON");'
    connection.execute(query)