## Initialize notebook

In [1]:
import typing as T

import requests
from bs4 import BeautifulSoup
import re
import json
import pandas as pd

pd.set_option("display.max_columns", 500)

## Define constants

In [2]:
START_SIT_MAPPING = {
    "A+": 4.33,
    "A": 4.0,
    "A-": 3.66,
    "B+": 3.33,
    "B": 3.0,
    "B-": 2.66,
    "C+": 2.33,
    "C": 2.0,
    "D+": 1.66,
    "D": 1.33,
    "D-": 1.0,
    "F": 0,
}

TEAM_MAPPING = {
    "ARI": "ari",
    "ATL": "atl",
    "BAL": "bal",
    "BUF": "buf",
    "CAR": "car",
    "CHI": "chi",
    "CIN": "cin",
    "CLE": "cle",
    "DAL": "dal",
    "DEN": "den",
    "DET": "det",
    "FA": "fa",  # free agent
    "GB": "gb",
    "HOU": "hou",
    "IND": "ind",
    "JAC": "jac",
    "KC": "kan",
    "LAC": "lac",
    "LAR": "lar",
    "LV": "lv",
    "MIA": "mia",
    "MIN": "min",
    "NE": "ne",
    "NO": "no",
    "NYG": "nyg",
    "NYJ": "nyj",
    "PIT": "pit",
    "PHI": "phi",
    "SF": "sfo",
    "SEA": "sea",
    "TB": "tam",
    "TEN": "ten",
    "WAS": "was",
}

## Define custom functions

In [3]:
def get_and_scrape_player_data(position: str) -> T.List[T.Dict]:
    path = position.lower()
    if position in ["WR", "RB", "TE"]:
        path = f"ppr-{path}"
        
    r = requests.get(
        f"https://www.fantasypros.com/nfl/rankings/{path}.php"
    )
    assert r.status_code == 200
    
    soup = BeautifulSoup(r.text)
    scripts = soup.find_all("script")
    
    filtered_scripts = [s for s in scripts if "var ecrData" in str(s)]
    assert len(filtered_scripts) == 1
    
    filtered = str(filtered_scripts[0].string)
    ecr_data = re.findall(r"var ecrData.*?=\s*(.*?)};", filtered, re.DOTALL | re.MULTILINE)
    assert len(ecr_data) == 1

    json_loadable_data = ecr_data[0].replace("/'", "'") + "}"
    data = json.loads(json_loadable_data)
    return data["players"]


def create_name_position_key(data: T.Dict) -> str:
    """Attempts to create a unique key for each player"""
    if data["position"] == "DST":
        team = data["team"]
        return f"{team}-dst"
    
    lower = data["name"].lower()
    cleaned = re.sub("[^a-z ]+", "", lower)
    names = cleaned.split(" ")
    positions = data["position"].lower().split(",")
    return f"{names[0]}-{names[1]}-{positions[0]}"

## Get data

In [4]:
positions = ["QB", "RB", "WR", "TE", "DST"]
data = []
for pos in positions:
    players = get_and_scrape_player_data(pos)
    print(f"number of {pos} found: {len(players)}")
    data.extend(players)
    
print("total number of players found:", len(data))

number of QB found: 36
number of RB found: 103
number of WR found: 156
number of TE found: 90
number of DST found: 32
total number of players found: 417


## Filter and convert data

In [5]:
df_data = pd.DataFrame(data)
df_data = df_data[
    [
        "player_name",
        "player_team_id",
        "player_eligibility",
        "player_opponent_id",
        "start_sit_grade",
        "r2p_pts",
        "rank_ecr",
        "player_owned_avg",
    ]
]
df_data.columns = [
    "name",
    "team",
    "position",
    "opponent",
    "start_sit_grade",
    "projected_points",
    "position_rank",
    "player_owned_pct",
]
df_data["start_sit_score"] = df_data["start_sit_grade"].map(START_SIT_MAPPING)
df_data["projected_points"] = df_data["projected_points"].astype(float)
df_data["team"] = df_data["team"].map(TEAM_MAPPING)
df_data["opponent"] = df_data["opponent"].map(TEAM_MAPPING)
df_data["name_position_key"] = df_data.apply(lambda x: create_name_position_key(x), axis=1)
df_data.head()

Unnamed: 0,name,team,position,opponent,start_sit_grade,projected_points,position_rank,player_owned_pct,start_sit_score,name_position_key
0,Patrick Mahomes II,kan,QB,tam,A+,23.7,1,100.0,4.33,patrick-mahomes-qb
1,Russell Wilson,sea,QB,phi,A+,23.5,2,100.0,4.33,russell-wilson-qb
2,Josh Allen,buf,QB,lac,A,23.0,3,99.4,4.0,josh-allen-qb
3,Kyler Murray,ari,QB,ne,A,22.3,4,100.0,4.0,kyler-murray-qb
4,Deshaun Watson,hou,QB,det,A-,21.4,5,99.3,3.66,deshaun-watson-qb


## Save data

In [6]:
df_data.to_csv("fantasy_pros_data_week12.csv", index=False)