Initial setup

In [None]:
import pandas as pd

In [None]:
# Establish paths to our CSV files
wnba2018 = "../Original_Data/WNBA_2018_season.csv"
wnba2019 = "../Original_Data/WNBA_2019_season.csv"
player2018 = "../Original_Data/player_stats_2018.csv"
player2019 = "../Original_Data/player_stats_2019.csv"

# Read our WNBA data into pandas
wnba_2018_df = pd.read_csv(wnba2018)
wnba_2019_df = pd.read_csv(wnba2019)
player_2018_df = pd.read_csv(player2018)
player_2019_df = pd.read_csv(player2019)

View the new DFs

In [None]:
#wnba_2018_df.head()

In [None]:
#wnba_2019_df.head()

In [None]:
#player_2018_df.head()

In [None]:
#player_2019_df.head()

Reduce Player DFs to only the 9 columns we want to evaluate


In [None]:
# 2018 Player Data reduction
# Extract "Player", "Tm", "Pos", "G", "FG", "FG%", "FT", "FT%" and "PTS"
reduced_player18_df = player_2018_df.loc[:, ["Player", "Tm", "Pos", "G", "FG", "FG%", "FT", "FT%", "PTS"]]
reduced_player18_df.head(10)

In [None]:
# 2019 Player Data reduction
# Extract "Player", "Tm", "Pos", "G", "FG", "FG%", "FT", "FT%" and "PTS"
reduced_player19_df = player_2019_df.loc[:, ["Player", "Tm", "Pos", "G", "FG", "FG%", "FT", "FT%", "PTS"]]
reduced_player19_df.head(10)

Remove the Box Score column from the Game Data

In [None]:
# 2018 Game Data reduction
# Extract "Date", "Visitor/Neutral", "PTS", "Visitor/Neutral", "PTS.1"
reduced_game18_df = wnba_2018_df.loc[:, ["Date", "Visitor/Neutral", "PTS", "Home/Neutral", "PTS.1"]]
reduced_game18_df.head(10)

In [None]:
# 2019 Game Data reduction
# Extract "Date", "Visitor/Neutral", "PTS", "Visitor/Neutral", "PTS.1"
reduced_game19_df = wnba_2019_df.loc[:, ["Date", "Visitor/Neutral", "PTS", "Home/Neutral", "PTS.1"]]
reduced_game19_df.head(10)

Update the Game Data DFs so team names match the 3 letter accrynyms on the Player Data DFs

In [None]:
#Replace each full team name with 3-letter accrynym (i.e. DallasWings --> DAL)

replacements = {
    "Dallas Wings": "DAL",
    "Chicago Sky": "CHI",
    "New York Liberty": "NYL",
    "Las Vegas Aces": "LVA",
    "Atlanta Dream": "ATL",
    "Los Angeles Sparks": "LAS",
    "Phoenix Mercury": "PHO",
    "Seattle Storm": "SEA",
    "Indiana Fever": "IND",
    "Washington Mystics": "WAS",
    "Minnesota Lynx": "MIN",
    "Connecticut Sun": "CON",
}
reduced_game18_df["Visitor/Neutral"].replace(replacements, inplace=True)

reduced_game18_df["Home/Neutral"].replace(replacements, inplace=True)

reduced_game19_df["Visitor/Neutral"].replace(replacements, inplace=True)

reduced_game19_df["Home/Neutral"].replace(replacements, inplace=True)
    

In [None]:
#View new Game Data DFs
#reduced_game18_df.head(30)
#reduced_game19_df.head(30)

Update column headers to match schema 

In [None]:
# Headers updates, Game Data:
revised_game18_df = reduced_game18_df.rename(columns={'Visitor/Neutral': 'away_team', 'PTS': 'away_team_pts', 'Home/Neutral': 'home_team', 'PTS.1': 'home_team_pts'})

revised_game19_df = reduced_game19_df.rename(columns={'Visitor/Neutral': 'away_team', 'PTS': 'away_team_pts', 'Home/Neutral': 'home_team', 'PTS.1': 'home_team_pts'})

#Header updates, Player Data:
revised_player18_df = reduced_player18_df.rename(columns={'Tm': 'team_name', 'Pos': 'Position',	'G': 'Games', 'FG%': 'FG_pct', 'FT%': 'FT_pct', 'PTS': 'Total_Pts'})

revised_player19_df = reduced_player19_df.rename(columns={'Tm': 'team_name', 'Pos': 'Position',	'G': 'Games', 'FG%': 'FG_pct', 'FT%': 'FT_pct', 'PTS': 'Total_Pts', })

In [None]:
# Add 'Season' Column to Game Data
revised_game18_df.insert(0, "Season", 2018, True)

revised_game19_df.insert(0, "Season", 2019, True)

revised_player18_df.insert(0, "Season", 2018, True)

revised_player19_df.insert(0, "Season", 2019, True)

In [None]:
# revised_game18_df

In [None]:
revised_player18_df

Export new DFs to CSV files

In [None]:
revised_player19_df.to_csv("revised_player19_df.csv", encoding='utf-8', index=False)


In [None]:
revised_player18_df.to_csv("revised_player18_df.csv", encoding='utf-8', index=False)

In [None]:
revised_game18_df.to_csv("revised_game18_df.csv", encoding='utf-8', index=False)

In [None]:
revised_game19_df.to_csv("revised_game19_df.csv", encoding='utf-8', index=False)