# WNBA Player Statistics

We are going to user player scoring data from the website [stats.wnba.com](https://stats.wnba.com) that has been saved to a CSV file.

In [None]:
import pandas as pd
try:
    df = pd.read_csv('../data/wnba-player-scoring-1997-2023.csv')
except:
    df = pd.read_csv('https://raw.githubusercontent.com/callysto/basketball-and-data-science/main/content/data/wnba-player-scoring-1997-2023.csv')
df

Let's try a quick visualization of points versus minutes played per game.

In [None]:
import plotly.express as px
px.scatter(df, x='MIN', y='PTS', title='Points vs. Minutes Played Per Game', hover_data=['PLAYER', 'TEAM', 'SEASON'])

There are a lot of abreviations in the column titles, we can use the glossary on [one of the stats pages](https://stats.wnba.com/team/1611661319/players-traditional) to create a dictionary for translating abreviations to what they mean.

In [None]:
column_titles = {
    'GP':'Games Played',
    'MIN':'Minutes Played',
    'PTS':'Points',
    'FGM':'Field Goals Made',
    'FGA':'Field Goals Attempted',
    'FG%':'Field Goal Percentage',
    '3PM':'3 Point Field Goals Made',
    '3PA':'3 Point Field Goals Attempted',
    '3P%':'3 Point Field Goal Percentage',
    'FTM':'Free Throws Made',
    'FTA':'Free Throws Attempted',
    'FT%':'Free Throw Percentage',
    'OREB':'Offensive Rebounds',
    'DREB':'Defensive Rebounds',
    'REB':'Rebounds',
    'AST':'Assists',
    'TOV':'Turnovers',
    'STL':'Steals',
    'BLK':'Blocks',
    'PF':'Personal Fouls',
    '+/-':'Plus Minus'}
print(f'For example, the column "FG%" means: {column_titles["FG%"]}')

Now we can use that dictionary to set the axis titles.

In [None]:
x = 'MIN'
y = 'PTS'
x_title = column_titles[x]
y_title = column_titles[y]
title = f'{y_title} vs. {x_title}'
px.scatter(df, x=x, y=y, title=title, hover_data=['PLAYER', 'TEAM', 'SEASON']).update_xaxes(title_text=x_title).update_yaxes(title_text=y_title)

In [None]:
from selenium import webdriver
import pandas as pd
import os

teams = {
    'Atlanta Dream':1611661330,
    'Chicago Sky':1611661329,
    'Connecticut Sun':1611661323,
    'Indiana Fever':1611661325,
    'New York Liberty':1611661313,
    'Washington Mystics':1611661322,
    'Dallas Wings':1611661321,
    'Las Vegas Aces':1611661319,
    'Los Angeles Sparks':1611661320,
    'Minnesota Lynx':1611661324,
    'Phoenix Mercury':1611661317,
    'Seattle Storm':1611661328 }

old_teams = {
    'Cleveland Rockers':1611661315,
    'Charlotte Sting':1611661314,
    'Houston Comets':1611661316,
    'Sacramento Monarchs':1611661318,
    'Miami Sol':1611661326,
    'Portland Fire':1611661327}

all_teams = {**teams, **old_teams}

#url = 'https://stats.wnba.com/team/1611661319/players-scoring/?Season=2023'

driver = webdriver.Chrome()

#season = 2023
for season in range(1997, 2024):
    #for team, id in teams.items():
    #for team, id in old_teams.items():
    for team, id in all_teams.items():
        # check if file exists
        if os.path.isfile(f'data/{team} - {season}.csv'):
            pass
        else:
            print(season, team)
            try:
                url = f'https://stats.wnba.com/team/{id}/players-traditional?Season={season}'
                driver.get(url)
                tables = driver.find_elements('xpath', "//table")
                #df = pd.read_html(tables[3].get_attribute('outerHTML'))[0]
                for table in tables:
                    df = pd.read_html(table.get_attribute('outerHTML'))[0]
                    if df.columns[0] == 'Players' and df.columns[1] == 'GP':
                        df['TEAM'] = team
                        df['SEASON'] = season
                        df.to_csv(f'data/{team} - {season}.csv', index=False)
                        break
            except:
                print('Error')

driver.quit()

# join all the csv files into one dataframe
import os
import pandas as pd
df_all = pd.DataFrame()
for file in os.listdir('data'):
    if file.endswith('.csv'):
        df = pd.read_csv(f'data/{file}')
        df_all = pd.concat([df_all, df])
# rename the Players column
df_all.rename(columns={'Players':'PLAYER'}, inplace=True)
# add a column for Team ID
df_all['TEAM_ID'] = df_all['TEAM'].map(all_teams)
df_all.to_csv('data/wnba.csv', index=False)