In [2]:
import requests
import pandas as pd
from typing import List
import numpy as np

This notebook is to pull in data from Pro Football Reference on Scoring and Fantasy Performance

- [Sample Page](https://www.pro-football-reference.com/years/2023/fantasy.htm)

In [3]:
def clean_columns(columns: List[str]) -> List[str]:
    return [f"{c[0]}_{c[1]}".lower() if "Unnamed" not in c[0] else c[1].lower() for c in columns ]


# Get all fantasy results dating back to 2010
def get_fantasy_scoring(year: int) -> pd.DataFrame:
    url = f"https://www.pro-football-reference.com/years/{year}/fantasy.htm#fantasy"

    df = pd.read_html(url)[0]

    # clean up columns from multi-index
    old_cols = df.columns
    df.columns = clean_columns(old_cols)

    # Add year column
    df['year'] = [year for _ in range(len(df))]

    return df

all_data = []
for year in range (2023, 2000, -1):
    all_data.append(get_fantasy_scoring(year))

In [None]:
# Combine all years into one year
df = pd.concat(all_data)
df

Unnamed: 0,rk,player,tm,fantpos,age,games_g,games_gs,passing_cmp,passing_att,passing_yds,...,scoring_2pm,scoring_2pp,fantasy_fantpt,fantasy_ppr,fantasy_dkpt,fantasy_fdpt,fantasy_vbd,fantasy_posrank,fantasy_ovrank,year
0,1,Christian McCaffrey*+,SFO,RB,27,16,16,0,0,0,...,,,324,391.3,399.3,357.8,157,1,1,2023
1,2,CeeDee Lamb*+,DAL,WR,24,17,17,0,0,0,...,1,,268,403.2,411.2,335.7,131,1,2,2023
2,3,Josh Allen,BUF,QB,27,17,17,385,579,4306,...,,3,393,392.6,420.6,410.6,122,1,3,2023
3,4,Tyreek Hill*+,MIA,WR,29,16,16,0,0,0,...,,,257,376.4,380.4,316.9,120,2,4,2023
4,5,Jalen Hurts*,PHI,QB,25,17,17,352,538,3858,...,,,357,356.8,382.8,371.8,89,2,5,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
546,530,Troy Walters,MIN,WR,25,6,0,0,0,0,...,,,-2,-2.0,-1.0,-2.0,,186,,2001
547,531,Craig Yeast,NYJ,WR,25,11,0,0,0,0,...,,,-2,-2.0,-1.0,-2.0,,187,,2001
548,532,David Dunn,OAK,WR,29,10,0,0,0,0,...,,,-3,-2.2,-0.2,-2.7,,188,,2001
549,533,Nate Jacquet,MIN,WR,26,10,0,0,0,0,...,,,-4,-4.0,-2.0,-4.0,,189,,2001


In [None]:
# Add pro-bowl and all-pro flags
df['pro_bowl'] = df['player'].str.contains("*", regex=False)
df['all_pro'] = df['player'].str.contains("+", regex=False)

# Some basic cleaning steps of the combined dataframe
df['player'] = df['player'].str.replace("\*|\+", "", regex=True)#.replace("+", "")

# Remove any interim header rows
df['rank'] = df['rk']
df = df.drop(['rk'], axis=1)
df = df.loc[df['rank'] != "Rk"]

df

Unnamed: 0,player,tm,fantpos,age,games_g,games_gs,passing_cmp,passing_att,passing_yds,passing_td,...,fantasy_ppr,fantasy_dkpt,fantasy_fdpt,fantasy_vbd,fantasy_posrank,fantasy_ovrank,year,pro_bowl,all_pro,rank
0,Christian McCaffrey,SFO,RB,27,16,16,0,0,0,0,...,391.3,399.3,357.8,157,1,1,2023,True,True,1
1,CeeDee Lamb,DAL,WR,24,17,17,0,0,0,0,...,403.2,411.2,335.7,131,1,2,2023,True,True,2
2,Josh Allen,BUF,QB,27,17,17,385,579,4306,29,...,392.6,420.6,410.6,122,1,3,2023,False,False,3
3,Tyreek Hill,MIA,WR,29,16,16,0,0,0,0,...,376.4,380.4,316.9,120,2,4,2023,True,True,4
4,Jalen Hurts,PHI,QB,25,17,17,352,538,3858,23,...,356.8,382.8,371.8,89,2,5,2023,True,False,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
546,Troy Walters,MIN,WR,25,6,0,0,0,0,0,...,-2.0,-1.0,-2.0,,186,,2001,False,False,530
547,Craig Yeast,NYJ,WR,25,11,0,0,0,0,0,...,-2.0,-1.0,-2.0,,187,,2001,False,False,531
548,David Dunn,OAK,WR,29,10,0,0,0,0,0,...,-2.2,-0.2,-2.7,,188,,2001,False,False,532
549,Nate Jacquet,MIN,WR,26,10,0,0,0,0,0,...,-4.0,-2.0,-4.0,,189,,2001,False,False,533


In [None]:
# Save Output to CSV file
df.to_csv("fantasy-scoring.csv", index=False)

NameError: name 'df' is not defined