# WNBA Player Statistics

We are going to user player scoring data from the website [stats.wnba.com](https://stats.wnba.com) that has been downloaded into a CSV file.



In [4]:
import pandas as pd
df = pd.read_csv('../data/wnba-player-scoring-1997-2023.csv')
df

Unnamed: 0,PLAYER,TEAM,TEAM_ID,SEASON,GP,MIN,%FGA 2PT,%FGA 3PT,%PTS 2PT,%PTS 2PT MR,...,%PTS FBPs,%PTS FT,%PTS OffTO,%PTS PITP,2FGM %AST,2FGM %UAST,3FGM %AST,3FGM %UAST,FGM %AST,FGM %UAST
0,Aari McDonald,Atlanta Dream,1611661330,1997,5,28.7,47.4,52.6,26.7,0.0,...,26.7,13.3,16.7,26.7,50.0,50.0,83.3,16.7,70.0,30.0
1,Allisha Gray,Atlanta Dream,1611661330,1997,13,32.4,78.5,21.5,56.5,9.6,...,17.0,27.8,10.9,47.0,63.1,36.9,91.7,8.3,67.5,32.5
2,Asia (AD) Durr,Atlanta Dream,1611661330,1997,13,14.8,63.4,36.6,53.3,17.8,...,18.9,16.7,20.0,35.6,33.3,66.7,100.0,0.0,51.5,48.5
3,Cheyenne Parker,Atlanta Dream,1611661330,1997,13,27.2,87.7,12.3,71.8,9.6,...,9.1,19.6,17.7,62.2,62.7,37.3,100.0,0.0,65.4,34.6
4,Danielle Robinson,Atlanta Dream,1611661330,1997,5,14.4,78.3,21.7,59.3,14.8,...,7.4,7.4,22.2,44.4,50.0,50.0,100.0,0.0,63.6,36.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8645,Myisha Hines-Allen,Washington Mystics,1611661322,2023,9,12.4,80.9,19.1,52.6,0.0,...,10.5,15.8,13.2,52.6,50.0,50.0,100.0,0.0,64.3,35.7
8646,Natasha Cloud,Washington Mystics,1611661322,2023,13,31.3,60.7,39.3,43.4,9.8,...,7.0,29.3,15.4,33.6,22.6,77.4,92.3,7.7,43.2,56.8
8647,Shakira Austin,Washington Mystics,1611661322,2023,13,26.2,100.0,0.0,81.1,4.1,...,9.5,18.9,30.4,77.0,68.3,31.7,0.0,0.0,68.3,31.7
8648,Shatori Walker-Kimbrough,Washington Mystics,1611661322,2023,14,18.2,54.0,46.0,34.6,11.5,...,17.3,25.0,55.8,23.1,66.7,33.3,100.0,0.0,81.3,18.8


In [10]:
import plotly.express as px
px.scatter(df, x='GP', y='MIN', hover_data=['PLAYER', 'TEAM', 'SEASON'])

There are a lot of abreviations in the column titles, we can use the glossary on [one of the stats pages](https://stats.wnba.com/team/1611661319/players-scoring) to create a dictionary for translating abreviations to what they mean.

In [8]:
column_titles = {
    'GP':'Games Played',
    'MIN':'Minutes Played',
    '%FGA 2PT':'Percent of Field Goals Attempted (2 Pointers)',
    '%FGA 3PT':'Percent of Field Goals Attempted (3 Pointers)',
    '%PTS 2PT':'Percent of Points (2 Pointers)',
    '%PTS 2PT MR':'Percent of Points (Mid-Range)',
    '%PTS 3PT':'Percent of Points (3 Pointers)',
    '%PTS FBPs':'Percent of Points (Fast Break Points)',
    '%PTS FT':'Percent of Points (Free Throws)',
    '%PTS OffTO':'Percent of Points (Off Turnovers)',
    '%PTS PITP':'Percent of Points (Points in the Paint)',
    '2FGM %AST':'Percent of 2 Point Field Goals Made Assisted',
    '2FGM %UAST':'Percent of 2 Point Field Goals Made Unassisted',
    '3FGM %AST':'Percent of 3 Point Field Goals Made Assisted',
    '3FGM %UAST':'Percent of 3 Point Field Goals Made Unassisted',
    'FGM %AST':'Percent of Field Goals Made Assisted',
    'FGM %UAST':'Percent of Field Goals Made Unassisted'}
print(f'For example, the column "%PTS 2PT MR" means: {column_titles["%PTS 2PT MR"]}')

For example, the column "%PTS 2PT MR" means: Percent of Points (Mid-Range)


In [11]:
from selenium import webdriver
import pandas as pd
import os

teams = {
    'Atlanta Dream':1611661330,
    'Chicago Sky':1611661329,
    'Connecticut Sun':1611661323,
    'Indiana Fever':1611661325,
    'New York Liberty':1611661313,
    'Washington Mystics':1611661322,
    'Dallas Wings':1611661321,
    'Las Vegas Aces':1611661319,
    'Los Angeles Sparks':1611661320,
    'Minnesota Lynx':1611661324,
    'Phoenix Mercury':1611661317,
    'Seattle Storm':1611661328 }

old_teams = {
    'Cleveland Rockers':1611661315,
    'Charlotte Sting':1611661314,
    'Houston Comets':1611661316,
    'Sacramento Monarchs':1611661318,
    'Miami Sol':1611661326,
    'Portland Fire':1611661327}

all_teams = {**teams, **old_teams}

#url = 'https://stats.wnba.com/team/1611661319/players-scoring/?Season=2023'

driver = webdriver.Chrome()

#season = 2023
for season in range(1997, 2024):
    #for team, id in teams.items():
    #for team, id in old_teams.items():
    for team, id in all_teams.items():
        # check if file exists
        if os.path.isfile(f'data/{team} - {season}.csv'):
            pass
        else:
            print(season, team)
            try:
                url = f'https://stats.wnba.com/team/{id}/players-traditional?Season={season}'
                driver.get(url)
                tables = driver.find_elements('xpath', "//table")
                #df = pd.read_html(tables[3].get_attribute('outerHTML'))[0]
                for table in tables:
                    df = pd.read_html(table.get_attribute('outerHTML'))[0]
                    if df.columns[0] == 'Players' and df.columns[1] == 'GP':
                        df['TEAM'] = team
                        df['SEASON'] = season
                        df.to_csv(f'data/{team} - {season}.csv', index=False)
                        break
            except:
                print('Error')

driver.quit()

# join all the csv files into one dataframe
import os
import pandas as pd
df_all = pd.DataFrame()
for file in os.listdir('data'):
    if file.endswith('.csv'):
        df = pd.read_csv(f'data/{file}')
        df_all = pd.concat([df_all, df])
# rename the Players column
df_all.rename(columns={'Players':'PLAYER'}, inplace=True)
# add a column for Team ID
df_all['TEAM_ID'] = df_all['TEAM'].map(all_teams)
df_all.to_csv('data/wnba.csv', index=False)

1997 Atlanta Dream
