In [20]:
years = list(range(1996, 2024))

In [2]:
import requests

mvp_url = "https://www.pro-football-reference.com/awards/awards_{}.htm#voting_apmvp"

for year in years:
    url = mvp_url.format(year) # replaces bracket w/ specific year
    data = requests.get(url)

    # w+ opens file in write mode &, if exists, overwrites it
    with open("mvp/{}.html".format(year), "w+") as f:
        f.write(data.text)

In [8]:
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
dfs = []

for year in years:
    with open("mvp/{}.html".format(year)) as f: # no w+ since opening in read mode
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    # decompose removes html element w/ tr tag & class "over_header"
    soup.find('tr', class_ = "over_header").decompose()
    # only care about table, use unique (or at least it should be) id
    mvp_table = soup.find(id="voting_apmvp")
    # convert to string before passing into pandas
    mvp = pd.read_html(str(mvp_table))[0] # [0] to extract single table
    mvp["Year"] = year

    dfs.append(mvp)

In [44]:
mvps = pd.concat(dfs)

In [45]:
mvps.tail()

Unnamed: 0,Rk,Pos,Player,Tm,Votes,Share,G,GS,Cmp,Att,...,TD.1,Rec,Yds.2,TD.2,Solo,Sk,Int.1,Year,Vote Pts,1st Place
5,6,WR,Tyreek Hill,Miami Dolphins,,12%,16,16,0,0,...,0,119,1799,13,0.0,0.0,0,2023,60.0,0.0
6,7,QB,Patrick Mahomes,Kansas City Chiefs,,2.4%,16,16,401,597,...,0,0,0,0,0.0,0.0,0,2023,12.0,0.0
7,8,QB,Matthew Stafford,Los Angeles Rams,,0.8%,15,15,326,521,...,0,0,0,0,0.0,0.0,0,2023,4.0,0.0
8,9,QB,C.J. Stroud,Houston Texans,,0.8%,15,15,319,499,...,3,1,0,0,0.0,0.0,0,2023,4.0,0.0
9,10,DE,Myles Garrett,Cleveland Browns,,0.2%,16,16,0,0,...,0,0,0,0,33.0,14.0,0,2023,1.0,0.0


In [46]:
mvps.to_csv("mvps.csv")

In [30]:
player_stats_url = "https://www.pro-football-reference.com/years/{}/fantasy.htm"

for year in years:
    url = player_stats_url.format(year) # replaces bracket w/ specific year
    data = requests.get(url)

    # w+ opens file in write mode &, if exists, overwrites it
    with open("player/{}.html".format(year), "w+") as f:
        f.write(data.text)

In [None]:
dfs = []

for year in years:
    with open("player/{}.html".format(year)) as f: # no w+ since opening in read mode
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    # only care about table, use unique (or at least it should be) id
    player_table = soup.find(id="fantasy")
    # convert to string before passing into pandas
    player = pd.read_html(str(player_table))[0] # [0] to extract single table
    player["Year"] = year

    dfs.append(player)

In [32]:
players = pd.concat(dfs)

In [33]:
players.tail()

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Games,Games,Passing,Passing,Passing,...,Scoring,Scoring,Fantasy,Fantasy,Fantasy,Fantasy,Fantasy,Fantasy,Fantasy,Year
Unnamed: 0_level_1,Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,...,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank,Unnamed: 21_level_1
650,630,Kyle Allen,BUF,QB,27,7,0,0,0,0,...,,,-1,-1.3,-1.3,-1.3,,83,,2023
651,631,Deon Jackson,3TM,RB,24,4,1,0,0,0,...,,,-1,4.0,6.0,1.5,,166,,2023
652,632,David Wells,TAM,TE,28,5,0,0,0,0,...,,,-1,1.0,1.0,,,140,,2023
653,633,James Proche,CLE,WR,27,10,1,0,0,0,...,,,-2,-2.0,-1.0,-2.0,,242,,2023
654,634,Trent Taylor,CHI,WR,29,17,0,0,0,0,...,,,-2,-2.2,-1.2,-2.2,,243,,2023


In [34]:
players.to_csv("players.csv")

In [47]:
defense_stats_url = "https://www.pro-football-reference.com/years/{}/defense.htm"

for year in years:
    url = defense_stats_url.format(year) # replaces bracket w/ specific year
    data = requests.get(url)

    # w+ opens file in write mode &, if exists, overwrites it
    with open("defense/{}.html".format(year), "w+") as f:
        f.write(data.text)

In [None]:
dfs = []

for year in years:
    with open("defense/{}.html".format(year)) as f: # no w+ since opening in read mode
        page = f.read()
    soup = BeautifulSoup(page, "html.parser")
    # only care about table, use unique (or at least it should be) id
    defense_table = soup.find(id="defense")
    # convert to string before passing into pandas
    defense = pd.read_html(str(defense_table))[0] # [0] to extract single table
    defense["Year"] = year

    dfs.append(defense)

In [50]:
defensive_players = pd.concat(dfs)

In [51]:
defensive_players.tail()

Unnamed: 0_level_0,Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Games,Games,Def Interceptions,Def Interceptions,Def Interceptions,...,Fumbles,Fumbles,Unnamed: 17_level_0,Tackles,Tackles,Tackles,Tackles,Tackles,Unnamed: 23_level_0,Year
Unnamed: 0_level_1,Rk,Player,Tm,Age,Pos,G,GS,Int,Yds,TD,...,Yds,TD,Sk,Comb,Solo,Ast,TFL,QBHits,Sfty,Unnamed: 21_level_1
1408,1364,Andrew Wylie,WAS,29,OL,15,15,,,0,...,0.0,0,0.0,,,,,,,2023
1409,1365,Bryce Young,CAR,22,QB,16,16,,,0,...,0.0,0,0.0,,,,,,,2023
1410,1366,Dareke Young,SEA,24,WR,6,0,,,0,...,,0,0.0,1.0,0.0,1.0,0.0,0.0,,2023
1411,1367,Kevin Zeitler*,BAL,33,G,15,15,,,0,...,0.0,0,0.0,,,,,,,2023
1412,1368,Ty Zentner,2TM,25,K-P,9,0,,,0,...,-6.0,0,0.0,,,,,,,2023


In [52]:
defensive_players.to_csv("defensive_players.csv")

In [35]:
team_stats_url = "https://www.pro-football-reference.com/years/{}/#team_stats"

for year in years:
    url = team_stats_url.format(year) # replaces bracket w/ specific year
    data = requests.get(url)

    # w+ opens file in write mode &, if exists, overwrites it
    with open("team/{}.html".format(year), "w+") as f:
        f.write(data.text)

In [None]:
dfs = []

for year in years:
    with open("team/{}.html".format(year)) as f: # no w+ since opening in read mode
        page = f.read()

    # afc data
    soup = BeautifulSoup(page, "html.parser")
    team_table = soup.find(id="AFC")
    team = pd.read_html(str(team_table))[0] # [0] to extract single table
    team["Year"] = year
    dfs.append(team)

    # nfc data
    soup = BeautifulSoup(page, "html.parser")
    team_table = soup.find(id="NFC")
    team = pd.read_html(str(team_table))[0] # [0] to extract single table
    team["Year"] = year
    dfs.append(team)

In [39]:
teams = pd.concat(dfs)

In [40]:
teams.tail()

Unnamed: 0,Tm,W,L,W-L%,PF,PA,PD,MoV,SoS,SRS,OSRS,DSRS,Year,T
15,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,NFC West,2023,
16,San Francisco 49ers*,12,5,.706,491,298,193,11.4,0.4,11.8,7.1,4.7,2023,
17,Los Angeles Rams+,10,7,.588,404,377,27,1.6,1.4,3.0,2.0,1.0,2023,
18,Seattle Seahawks,9,8,.529,364,402,-38,-2.2,1.3,-1.0,-0.2,-0.8,2023,
19,Arizona Cardinals,4,13,.235,330,455,-125,-7.4,1.9,-5.4,-1.8,-3.7,2023,


In [42]:
teams.to_csv("teams.csv")