In [17]:
import pandas as pd
import time

In [18]:
url = "https://www.fantasypros.com/nfl/stats/{pos}.php?year={year}&week={week}&scoring=HALF&range=week"
positions = ["qb", "rb", "wr", "te", "dst", "k"]
years = list(range(2002, 2024))
weeks = list(range(1, 18))

In [19]:
rename_qb = {
    "cmp": "pass_cmp",
    "att": "pass_att",
    "pct": "pass_cmp_perc",
    "yds": "pass_yds",
    "y/a": "pass_yds_per_att",
    "td": "pass_td",
    "int": "pass_int",
    "sacks": "pass_sacks",
    "att.1": "rush_att",
    "yds.1": "rush_yds",
    "td.1": "rush_td",
    "fl": "fumbles",
    "g": "games",
    "fpts/g": "fpts_per_game"
}
rename_rb = {
    "att": "rush_att",
    "yds": "rush_yds",
    "y/a": "rush_yds_per_att",
    "lg": "rush_longest",
    "20+": "rush_20_plus",
    "td": "rush_td",
    "rec": "rec",
    "tgt": "rec_tgt",
    "yds.1": "rec_yds",
    "y/r": "rec_yds_per_rec",
    "td.1": "rec_td",
    "fl": "fumbles",
    "fpts/g": "fpts_per_game"
}
rename_wr = {
    "rec": "rec",
    "tgt": "rec_tgt",
    "yds": "rec_yds",
    "y/r": "rec_yds_per_rec",
    "lg": "rec_longest",
    "20+": "rec_20_plus",
    "td": "rec_td",
    "att": "rush_att",
    "yds.1": "rush_yds",
    "td.1": "rush_td",
    "fl": "fumbles",
    "fpts/g": "fpts_per_game"
}
rename_te = {
    "rec": "rec",
    "tgt": "rec_tgt",
    "yds": "rec_yds",
    "y/r": "rec_yds_per_rec",
    "lg": "rec_longest",
    "20+": "rec_20_plus",
    "td": "rec_td",
    "att": "rush_att",
    "yds.1": "rush_yds",
    "td.1": "rush_td",
    "fl": "fumbles",
    "fpts/g": "fpts_per_game"
}
rename_k = {
    "fg": "fg",
    "fga": "fg_att",
    "pct": "fg_perc",
    "lg": "fg_longest",
    "1-19": "fg_1_19",
    "20-29": "fg_20_29",
    "30-39": "fg_30_39",
    "40-49": "fg_40_49",
    "50+": "fg_50_plus",
    "xpt": "xp",
    "xpa": "xp_att",
    "fpts/g": "fpts_per_game"
}
rename_dst = {
    "sack": "def_sacks",
    "int": "def_int",
    "ff": "def_fum_forced",
    "def_td": "def_td",
    "sfty": "def_safety",
    "spc td": "def_special_teams_td",
    "fpts/g": "fpts_per_game"
}
rename_cols = {
    "qb": rename_qb,
    "rb": rename_rb,
    "wr": rename_wr,
    "te": rename_te,
    "k": rename_k,
    "dst": rename_dst
}

In [21]:
dfs = {
    "qb": [],
    "rb": [],
    "wr": [],
    "te": [],
    "k": [],
    "dst": []
}
for year in years:
    for week in weeks:
        for pos in positions:
            if pos in ["dst", "k"]:
                hdr = 0
            elif pos in ["qb", "rb", "wr", "te"]:
                hdr = 1
            try:
                print(f"Scraping: {pos} {year} {week}")
                tables = pd.read_html(url.format(pos=pos, year=year, week=week), header=hdr)
                df = tables[0]
                df.columns = df.columns.str.lower()
                df.rename(columns=rename_cols[pos], inplace=True)
                df["year"] = year
                df["week"] = week
                df["pos"] = pos
                dfs[pos].append(df)
            except:
                print(f"Error: {pos} {year} {week}")
            time.sleep(2)
    
    
    
# df = pd.concat(dfs)
# df.to_csv(config["files"]["fantasydata"]["fpts_all_ssns"], index=False)

Scraping: qb 2002 1
Scraping: rb 2002 1
Scraping: wr 2002 1
Scraping: te 2002 1
Scraping: dst 2002 1
Scraping: k 2002 1
Scraping: qb 2002 2
Scraping: rb 2002 2
Scraping: wr 2002 2
Scraping: te 2002 2
Scraping: dst 2002 2
Scraping: k 2002 2
Scraping: qb 2002 3
Scraping: rb 2002 3
Scraping: wr 2002 3
Scraping: te 2002 3
Scraping: dst 2002 3
Scraping: k 2002 3
Scraping: qb 2002 4
Scraping: rb 2002 4
Scraping: wr 2002 4
Scraping: te 2002 4
Scraping: dst 2002 4
Scraping: k 2002 4
Scraping: qb 2002 5
Scraping: rb 2002 5
Scraping: wr 2002 5
Scraping: te 2002 5
Scraping: dst 2002 5
Scraping: k 2002 5
Scraping: qb 2002 6
Scraping: rb 2002 6
Scraping: wr 2002 6
Scraping: te 2002 6
Scraping: dst 2002 6
Scraping: k 2002 6
Scraping: qb 2002 7
Scraping: rb 2002 7
Scraping: wr 2002 7
Scraping: te 2002 7
Scraping: dst 2002 7
Scraping: k 2002 7
Scraping: qb 2002 8
Scraping: rb 2002 8
Scraping: wr 2002 8
Scraping: te 2002 8
Scraping: dst 2002 8
Scraping: k 2002 8
Scraping: qb 2002 9
Scraping: rb 2002 9


In [24]:
for pos, df in dfs.items():
    df = pd.concat(df)
    df.to_csv(f"data/fantasypros/fantasypros_{pos}_weekly_2002-2023.csv", index=False)