In [1]:
# Clone your GitHub repo (you’ll be prompted to authorize if it's private)
!git clone https://github.com/colterwood/LHL-final-final-project.git

Cloning into 'LHL-final-final-project'...
remote: Enumerating objects: 75, done.[K
remote: Counting objects: 100% (75/75), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 75 (delta 35), reused 19 (delta 6), pack-reused 0 (from 0)[K
Receiving objects: 100% (75/75), 1.00 MiB | 2.28 MiB/s, done.
Resolving deltas: 100% (35/35), done.


In [2]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from io import StringIO
import string
import time

In [None]:
## Find players in "A" page that played in 2024 ##
# Target a single player list page: Last names starting with 'a'
url = "https://www.basketball-reference.com/wnba/players/a/"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

# Grab all <p> tags that list players
player_paragraphs = soup.find_all("p")

# Print players who played in 2024
for p in player_paragraphs:
    if "2024" in p.text:
        print(p.text.strip())

Lindsay Allen
2017 to 2024
Rebecca Allen
2015 to 2024
Laeticia Amihere
2023 to 2024
At  Ariel Atkins
2018 to 2024
Amy Atwell
2022 to 2024
Shakira Austin
2022 to 2024


In [None]:
url = "https://www.basketball-reference.com/wnba/players/a/"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

player_paragraphs = soup.find_all("p")

# Expand this to show player URLs
for p in player_paragraphs:
    if "2024" in p.text:
        a_tag = p.find("a")
        name = a_tag.text.strip()
        link = a_tag["href"]
        print(name, link)

Lindsay Allen /wnba/players/a/allenli01w.html
Rebecca Allen /wnba/players/a/allenre01w.html
Laeticia Amihere /wnba/players/a/amihela01w.html
Ariel Atkins /wnba/players/a/atkinar01w.html
Amy Atwell /wnba/players/a/atwelam01w.html
Shakira Austin /wnba/players/a/austish01w.html


In [None]:
## Try to see tables for Lindsay Allen in game logs page ##
# Target 2024 gamelog page for Lindsay Allen
url = "https://www.basketball-reference.com/wnba/players/a/allenli01w/gamelog/2024/"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

# Find the table we care about
table = soup.find("table", id="wnba_pgl_basic")

# Parse the table if it exists
if table is not None:
    df = pd.read_html(StringIO(str(table)))[0]
    print(df.head())
else:
    print("Table not found")

  Rk        Date     Age   Tm Unnamed: 4  Opp Unnamed: 6 GS     MP FG  ...  \
0  1  2024-05-15  29-056  CHI          @  DAL     L (-8)  0  13:31  2  ...   
1  2  2024-05-18  29-059  CHI          @  DAL     W (+9)  0   9:06  1  ...   
2  3  2024-05-23  29-064  CHI          @  NYL     W (+9)  0  18:44  4  ...   
3  4  2024-05-25  29-066  CHI        NaN  CON     L (-4)  0  15:38  3  ...   
4  5  2024-05-28  29-069  CHI        NaN  SEA     L (-9)  0  23:03  0  ...   

  ORB DRB TRB AST STL BLK TOV PF PTS GmSc  
0   0   1   1   0   1   0   0  0   5  3.2  
1   0   1   1   1   0   0   1  0   2  1.7  
2   0   1   1   2   0   0   1  3   8  4.2  
3   1   1   2   2   2   0   2  1   6  7.1  
4   0   2   2   4   0   0   3  2   3  0.1  

[5 rows x 28 columns]


In [None]:
for col in df.columns:
    print(col)

Rk
Date
Age
Tm
Unnamed: 4
Opp
Unnamed: 6
GS
MP
FG
FGA
FG%
3P
3PA
3P%
FT
FTA
FT%
ORB
DRB
TRB
AST
STL
BLK
TOV
PF
PTS
GmSc


In [None]:
# Rename columns
df = df.rename(columns={
    "Unnamed: 4": "home_away",
    "Unnamed: 6": "win_margin"
})

# Drop header rows accidentally parsed as data
df = df[df["Rk"] != "Rk"]

# Convert Age from 'YY-DDD' to decimal years
age_parts = df["Age"].str.extract(r"(\d+)-(\d+)")
age_parts = age_parts.astype(float)
df["Age"] = round(age_parts[0] + age_parts[1] / 365, 1)

# Convert home_away: '@' → 'away', else 'home'
df["home_away"] = df["home_away"].apply(lambda x: "away" if x == "@" else "home")

# Convert win_margin: extract number inside parentheses
df["win_margin"] = df["win_margin"].str.extract(r"\(([-+]?\d+)\)").astype(float)

# Convert MP (minutes played) from MM:SS to float minutes
def convert_mp(val):
    if pd.isna(val):
        return np.nan
    mins, secs = map(int, val.split(":"))
    return round(mins + secs / 60, 1)

df["MP"] = df["MP"].apply(convert_mp)

In [None]:
df.head()

Unnamed: 0,Rk,Date,Age,Tm,home_away,Opp,win_margin,GS,MP,FG,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc
0,1,2024-05-15,29.2,CHI,away,DAL,-8.0,0,13.5,2,...,0,1,1,0,1,0,0,0,5,3.2
1,2,2024-05-18,29.2,CHI,away,DAL,9.0,0,9.1,1,...,0,1,1,1,0,0,1,0,2,1.7
2,3,2024-05-23,29.2,CHI,away,NYL,9.0,0,18.7,4,...,0,1,1,2,0,0,1,3,8,4.2
3,4,2024-05-25,29.2,CHI,home,CON,-4.0,0,15.6,3,...,1,1,2,2,2,0,2,1,6,7.1
4,5,2024-05-28,29.2,CHI,home,SEA,-9.0,0,23.1,0,...,0,2,2,4,0,0,3,2,3,0.1


In [None]:
# Start fresh to avoid duplication from earlier cells
frames = []
df = pd.DataFrame()

# Collect all 2024 players from the 'a' page
url = "https://www.basketball-reference.com/wnba/players/a/"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

player_tags = soup.find_all("p")
players = []

for tag in player_tags:
    if "2024" in tag.text:
        a_tag = tag.find("a")
        name = a_tag.text.strip()
        link = a_tag["href"]
        players.append((name, link))

# Loop through those players and extract + clean their gamelog tables
frames = []

for name, rel_link in players:
    gamelog_url = f"https://www.basketball-reference.com{rel_link.replace('.html', '/gamelog/2024/')}"
    response = requests.get(gamelog_url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find("table", id="wnba_pgl_basic")

    if table is None:
        print(f"No table found for {name}")
        continue

    df = pd.read_html(StringIO(str(table)))[0]
    df = df[df["Rk"] != "Rk"]

    df = df.rename(columns={
        "Unnamed: 4": "home_away",
        "Unnamed: 6": "win_margin"
    })

    # Clean age
    age_parts = df["Age"].str.extract(r"(\d+)-(\d+)").astype(float)
    df["Age"] = round(age_parts[0] + age_parts[1] / 365, 1)

    # Clean other fields
    df["home_away"] = df["home_away"].apply(lambda x: "away" if x == "@" else "home")
    df["win_margin"] = df["win_margin"].str.extract(r"\(([-+]?\d+)\)").astype(float)

    def convert_mp(val):
        if pd.isna(val):
            return np.nan
        mins, secs = map(int, val.split(":"))
        return round(mins + secs / 60, 1)

    df["MP"] = df["MP"].apply(convert_mp)

    df.insert(0, "Player", name)
    frames.append(df)

# Combine all players into one DataFrame
df = pd.concat(frames, ignore_index=True)

In [None]:
df.head()

Unnamed: 0,Player,Rk,Date,Age,Tm,home_away,Opp,win_margin,GS,MP,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc
0,Lindsay Allen,1,2024-05-15,29.2,CHI,away,DAL,-8.0,0,13.5,...,0,1,1,0,1,0,0,0,5,3.2
1,Lindsay Allen,2,2024-05-18,29.2,CHI,away,DAL,9.0,0,9.1,...,0,1,1,1,0,0,1,0,2,1.7
2,Lindsay Allen,3,2024-05-23,29.2,CHI,away,NYL,9.0,0,18.7,...,0,1,1,2,0,0,1,3,8,4.2
3,Lindsay Allen,4,2024-05-25,29.2,CHI,home,CON,-4.0,0,15.6,...,1,1,2,2,2,0,2,1,6,7.1
4,Lindsay Allen,5,2024-05-28,29.2,CHI,home,SEA,-9.0,0,23.1,...,0,2,2,4,0,0,3,2,3,0.1


In [None]:
for player in df["Player"].unique():
    print(player)

Lindsay Allen
Rebecca Allen
Laeticia Amihere
Ariel Atkins
Amy Atwell
Shakira Austin


In [None]:
# Null check
print(df.isnull().sum())

Player         0
Rk             0
Date           0
Age            0
Tm             0
home_away      0
Opp            0
win_margin     0
GS             0
MP             0
FG             0
FGA            0
FG%            8
3P             0
3PA            0
3P%           36
FT             0
FTA            0
FT%           65
ORB            0
DRB            0
TRB            0
AST            0
STL            0
BLK            0
TOV            0
PF             0
PTS            0
GmSc           0
dtype: int64


In [None]:
# Base URL pattern
base_url = "https://www.basketball-reference.com/wnba/players/{}/"
headers = {"User-Agent": "Mozilla/5.0"}

# Store (name, link) for all 2024 players
players = []

# Loop through a–z player index pages
for letter in string.ascii_lowercase:
    url = base_url.format(letter)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    player_paragraphs = soup.find_all("p")

    # Filter for players with '2024' in their years active
    for p in player_paragraphs:
        if "2024" in p.text:
            a_tag = p.find("a")
            name = a_tag.text.strip()
            link = a_tag["href"]
            players.append((name, link))

# Print result
print(f"Found {len(players)} players with 2024:")
for name, link in players:
    print(name, link)

Found 157 players with 2024:
Lindsay Allen /wnba/players/a/allenli01w.html
Rebecca Allen /wnba/players/a/allenre01w.html
Laeticia Amihere /wnba/players/a/amihela01w.html
Ariel Atkins /wnba/players/a/atkinar01w.html
Amy Atwell /wnba/players/a/atwelam01w.html
Shakira Austin /wnba/players/a/austish01w.html
Rachel Banham /wnba/players/b/banhara01w.html
Kierstan Bell /wnba/players/b/bellki01w.html
Grace Berger /wnba/players/b/bergegr01w.html
Morgan Bertsch /wnba/players/b/bertsmo01w.html
Caitlin Bickle /wnba/players/b/bicklca01w.html
Monique Billings /wnba/players/b/billimo01w.html
DeWanna Bonner /wnba/players/b/bonnede01w.html
Aliyah Boston /wnba/players/b/bostoal01w.html
Cameron Brink /wnba/players/b/brinkca01w.html
Jaelyn Brown /wnba/players/b/brownja06w.html
Kalani Brown /wnba/players/b/brownka01w.html
Lexie Brown /wnba/players/b/brownle02w.html
Jakia Brown-Turner /wnba/players/b/brownja07w.html
Kennedy Burke /wnba/players/b/burkeke01w.html
Rae Burrell /wnba/players/b/burrera01w.html
Ve

In [None]:
# Count players with last names starting with 'B'
count_b = sum(1 for name, _ in players if name.split()[-1].startswith("B"))
print(f"Players with last name starting with B: {count_b}")

Players with last name starting with B: 16


In [None]:
# Count players with last names starting with 'I'
count_i = sum(1 for name, _ in players if name.split()[-1].startswith("I"))
print(f"Players with last name starting with I: {count_i}")

Players with last name starting with I: 1


In [None]:
# Start fresh to avoid duplication from earlier cells
frames = []
df = pd.DataFrame()

# Loop through those players and extract + clean their gamelog tables
frames = []

# Loop through players and extract gamelog data
for name, rel_link in players:
    gamelog_url = f"https://www.basketball-reference.com{rel_link.replace('.html', '/gamelog/2024/')}"
    response = requests.get(gamelog_url, headers=headers)

    # Wait to avoid rate limits
    time.sleep(3)  # ~20 requests per minute max

    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find("table", id="wnba_pgl_basic")

    if table is None:
        print(f"No table found for {name}")
        continue

    df = pd.read_html(StringIO(str(table)))[0]
    df = df[df["Rk"] != "Rk"]

    df = df.rename(columns={
        "Unnamed: 4": "home_away",
        "Unnamed: 6": "win_margin"
    })

    # Clean age
    age_parts = df["Age"].str.extract(r"(\d+)-(\d+)").astype(float)
    df["Age"] = round(age_parts[0] + age_parts[1] / 365, 1)

    # Clean other fields
    df["home_away"] = df["home_away"].apply(lambda x: "away" if x == "@" else "home")
    df["win_margin"] = df["win_margin"].str.extract(r"\(([-+]?\d+)\)").astype(float)

    def convert_mp(val):
        if pd.isna(val):
            return np.nan
        mins, secs = map(int, val.split(":"))
        return round(mins + secs / 60, 1)

    df["MP"] = df["MP"].apply(convert_mp)

    df.insert(0, "Player", name)
    frames.append(df)

# Combine all players into one DataFrame
df = pd.concat(frames, ignore_index=True)

In [None]:
for player in df["Player"].unique():
    print(player)

Lindsay Allen
Rebecca Allen
Laeticia Amihere
Ariel Atkins
Amy Atwell
Shakira Austin
Rachel Banham
Kierstan Bell
Grace Berger
Morgan Bertsch
Caitlin Bickle
Monique Billings
DeWanna Bonner
Aliyah Boston
Cameron Brink
Jaelyn Brown
Kalani Brown
Lexie Brown
Jakia Brown-Turner
Kennedy Burke
Rae Burrell
Veronica Burton
Maya Caldwell
Jordin Canada
Emma Cannon
Kamilla Cardoso
Bridget Carleton
DiJonai Carrington
Chennedy Carter
Jessika Carter
Tina Charles
Layshia Clarendon
Alysha Clark
Caitlin Clark
Natasha Cloud
Nia Coffey
Napheesa Collier
Sydney Colson
Zia Cooke
Kahleah Copper
Lorela Cubaj
Sophie Cunningham
Crystal Dangerfield
Damiris Dantas
Kaela Davis
Marquesha Davis
Diamond DeShields
Skylar Diggins-Smith
Liz Dixon
Ivana Dojkić
Stefanie Dolson
Aaliyah Edwards
Queen Egbo
Emily Engstler
Olivia Époupa
Dana Evans
Temi Fagbenle
Dyaisha Fair
Dulcy Fankam Mendjiadeu
Leonie Fiebich
Kysre Gondrezick
Allisha Gray
Chelsea Gray
Brittney Griner
Megan Gustafson
Dearica Hamby
Mikiah Herbert Harrigan
Tyasha

In [None]:
# Null check
print(df.isnull().sum())

Player           0
Rk               0
Date             0
Age              0
Tm               0
home_away        0
Opp              0
win_margin       0
GS               0
MP               0
FG               0
FGA              0
FG%            305
3P               0
3PA              0
3P%           1332
FT               0
FTA              0
FT%           2096
ORB              0
DRB              0
TRB              0
AST              0
STL              0
BLK              0
TOV              0
PF               0
PTS              0
GmSc             0
dtype: int64


In [None]:
df.shape

(4512, 29)

In [None]:
print(df["Player"].nunique())

157


In [None]:
print(df.duplicated().sum())

0


In [None]:
# Save df to csv
df.to_csv("LHL-final-final-project/data/2024_player_gamelogs.csv", index=False)

In [None]:
!git config --global user.email "ycolterwood@me.com"
!git config --global user.name "Colter Wood"

In [None]:
%cd LHL-final-final-project
!git add data/2024_player_gamelogs.csv
!git commit -m "Add 2024 WNBA player gamelogs"
!git push https://colterwood:<TOKEN>@github.com/colterwood/LHL-final-final-project.git


/content/LHL-final-final-project
[main e2af797] Add 2024 WNBA player gamelogs
 1 file changed, 4513 insertions(+)
 create mode 100644 data/2024_player_gamelogs.csv
Enumerating objects: 6, done.
Counting objects: 100% (6/6), done.
Delta compression using up to 8 threads
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 107.43 KiB | 3.70 MiB/s, done.
Total 4 (delta 2), reused 0 (delta 0), pack-reused 0
remote: Resolving deltas: 100% (2/2), completed with 2 local objects.[K
To https://github.com/colterwood/LHL-final-final-project.git
   753754e..e2af797  main -> main


In [33]:
# load the player game logs CSV from the data folder
df = pd.read_csv("LHL-final-final-project/data/2024_player_gamelogs.csv")

# preview
df.head()

Unnamed: 0,Player,Rk,Date,Age,Tm,home_away,Opp,win_margin,GS,MP,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc
0,Lindsay Allen,1,2024-05-15,29.2,CHI,away,DAL,-8.0,0,13.5,...,0,1,1,0,1,0,0,0,5,3.2
1,Lindsay Allen,2,2024-05-18,29.2,CHI,away,DAL,9.0,0,9.1,...,0,1,1,1,0,0,1,0,2,1.7
2,Lindsay Allen,3,2024-05-23,29.2,CHI,away,NYL,9.0,0,18.7,...,0,1,1,2,0,0,1,3,8,4.2
3,Lindsay Allen,4,2024-05-25,29.2,CHI,home,CON,-4.0,0,15.6,...,1,1,2,2,2,0,2,1,6,7.1
4,Lindsay Allen,5,2024-05-28,29.2,CHI,home,SEA,-9.0,0,23.1,...,0,2,2,4,0,0,3,2,3,0.1


In [34]:
# make all column headers lowercase
df.columns = df.columns.str.lower()

In [35]:
# print each column with its dtype in a readable list
for col in df.columns:
    print(f"- {col}: {df[col].dtype}")

- player: object
- rk: int64
- date: object
- age: float64
- tm: object
- home_away: object
- opp: object
- win_margin: float64
- gs: int64
- mp: float64
- fg: int64
- fga: int64
- fg%: float64
- 3p: int64
- 3pa: int64
- 3p%: float64
- ft: int64
- fta: int64
- ft%: float64
- orb: int64
- drb: int64
- trb: int64
- ast: int64
- stl: int64
- blk: int64
- tov: int64
- pf: int64
- pts: int64
- gmsc: float64


In [36]:
# change 'home_away' column to 1 for home and 2 for away
df['home_away'] = df['home_away'].map({'home': 1, 'away': 2})

In [37]:
# count of each value in 'home_away'
df['home_away'].value_counts()

Unnamed: 0_level_0,count
home_away,Unnamed: 1_level_1
1,2260
2,2252


In [38]:
# print each column with its dtype in a readable list
for col in df.columns:
    print(f"- {col}: {df[col].dtype}")

- player: object
- rk: int64
- date: object
- age: float64
- tm: object
- home_away: int64
- opp: object
- win_margin: float64
- gs: int64
- mp: float64
- fg: int64
- fga: int64
- fg%: float64
- 3p: int64
- 3pa: int64
- 3p%: float64
- ft: int64
- fta: int64
- ft%: float64
- orb: int64
- drb: int64
- trb: int64
- ast: int64
- stl: int64
- blk: int64
- tov: int64
- pf: int64
- pts: int64
- gmsc: float64


In [39]:
# replace '%' with '_pct' in column names
df.columns = df.columns.str.replace('%', '_pct', regex=False)

In [40]:
# print each column with its dtype in a readable list
for col in df.columns:
    print(f"- {col}: {df[col].dtype}")

- player: object
- rk: int64
- date: object
- age: float64
- tm: object
- home_away: int64
- opp: object
- win_margin: float64
- gs: int64
- mp: float64
- fg: int64
- fga: int64
- fg_pct: float64
- 3p: int64
- 3pa: int64
- 3p_pct: float64
- ft: int64
- fta: int64
- ft_pct: float64
- orb: int64
- drb: int64
- trb: int64
- ast: int64
- stl: int64
- blk: int64
- tov: int64
- pf: int64
- pts: int64
- gmsc: float64


In [41]:
# Null check
print(df.isnull().sum())

player           0
rk               0
date             0
age              0
tm               0
home_away        0
opp              0
win_margin       0
gs               0
mp               0
fg               0
fga              0
fg_pct         305
3p               0
3pa              0
3p_pct        1332
ft               0
fta              0
ft_pct        2096
orb              0
drb              0
trb              0
ast              0
stl              0
blk              0
tov              0
pf               0
pts              0
gmsc             0
dtype: int64


In [42]:
# set fg_pct to 0.0 where fga == 0
df.loc[df['fga'] == 0, 'fg_pct'] = 0.0

# set ft_pct to 0.0 where fta == 0
df.loc[df['fta'] == 0, 'ft_pct'] = 0.0

# set 3p_pct to 0.0 where 3pa == 0
df.loc[df['3pa'] == 0, '3p_pct'] = 0.0

In [43]:
# Null check
print(df.isnull().sum())

player        0
rk            0
date          0
age           0
tm            0
home_away     0
opp           0
win_margin    0
gs            0
mp            0
fg            0
fga           0
fg_pct        0
3p            0
3pa           0
3p_pct        0
ft            0
fta           0
ft_pct        0
orb           0
drb           0
trb           0
ast           0
stl           0
blk           0
tov           0
pf            0
pts           0
gmsc          0
dtype: int64


In [44]:
# convert to datetime (if not already)
df['date'] = pd.to_datetime(df['date'])

# split into year, month, day
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

In [45]:
# Null check
print(df.isnull().sum())

player        0
rk            0
date          0
age           0
tm            0
home_away     0
opp           0
win_margin    0
gs            0
mp            0
fg            0
fga           0
fg_pct        0
3p            0
3pa           0
3p_pct        0
ft            0
fta           0
ft_pct        0
orb           0
drb           0
trb           0
ast           0
stl           0
blk           0
tov           0
pf            0
pts           0
gmsc          0
year          0
month         0
day           0
dtype: int64


In [46]:
df.head()

Unnamed: 0,player,rk,date,age,tm,home_away,opp,win_margin,gs,mp,...,ast,stl,blk,tov,pf,pts,gmsc,year,month,day
0,Lindsay Allen,1,2024-05-15,29.2,CHI,2,DAL,-8.0,0,13.5,...,0,1,0,0,0,5,3.2,2024,5,15
1,Lindsay Allen,2,2024-05-18,29.2,CHI,2,DAL,9.0,0,9.1,...,1,0,0,1,0,2,1.7,2024,5,18
2,Lindsay Allen,3,2024-05-23,29.2,CHI,2,NYL,9.0,0,18.7,...,2,0,0,1,3,8,4.2,2024,5,23
3,Lindsay Allen,4,2024-05-25,29.2,CHI,1,CON,-4.0,0,15.6,...,2,2,0,2,1,6,7.1,2024,5,25
4,Lindsay Allen,5,2024-05-28,29.2,CHI,1,SEA,-9.0,0,23.1,...,4,0,0,3,2,3,0.1,2024,5,28


In [47]:
# drop 'date' column
df = df.drop(columns=['date'])

# build new column order without year/month/day
cols = [col for col in df.columns if col not in ['year', 'month', 'day']]

# find where to insert
insert_at = cols.index('age')

# insert year/month/day before 'age'
new_order = cols[:insert_at] + ['year', 'month', 'day'] + cols[insert_at:]

# reorder
df = df[new_order]

In [48]:
df.head()

Unnamed: 0,player,rk,year,month,day,age,tm,home_away,opp,win_margin,...,orb,drb,trb,ast,stl,blk,tov,pf,pts,gmsc
0,Lindsay Allen,1,2024,5,15,29.2,CHI,2,DAL,-8.0,...,0,1,1,0,1,0,0,0,5,3.2
1,Lindsay Allen,2,2024,5,18,29.2,CHI,2,DAL,9.0,...,0,1,1,1,0,0,1,0,2,1.7
2,Lindsay Allen,3,2024,5,23,29.2,CHI,2,NYL,9.0,...,0,1,1,2,0,0,1,3,8,4.2
3,Lindsay Allen,4,2024,5,25,29.2,CHI,1,CON,-4.0,...,1,1,2,2,2,0,2,1,6,7.1
4,Lindsay Allen,5,2024,5,28,29.2,CHI,1,SEA,-9.0,...,0,2,2,4,0,0,3,2,3,0.1


In [49]:
df.shape

(4512, 31)

In [50]:
# drop the 'rk' column
df = df.drop(columns=['rk'])

In [51]:
# print each column with its dtype in a readable list
for col in df.columns:
    print(f"- {col}: {df[col].dtype}")

- player: object
- year: int32
- month: int32
- day: int32
- age: float64
- tm: object
- home_away: int64
- opp: object
- win_margin: float64
- gs: int64
- mp: float64
- fg: int64
- fga: int64
- fg_pct: float64
- 3p: int64
- 3pa: int64
- 3p_pct: float64
- ft: int64
- fta: int64
- ft_pct: float64
- orb: int64
- drb: int64
- trb: int64
- ast: int64
- stl: int64
- blk: int64
- tov: int64
- pf: int64
- pts: int64
- gmsc: float64
