# Merge FantasyPros Projections

https://www.fantasypros.com/nfl/rankings/half-point-ppr-cheatsheets.php


## terminology

### ECR

> ECR represents the collective opinion of the experts we track. We generate a consensus cheat sheet by calculating how many "Rank Points" each player receives based on his ranked position on each expert’s cheat sheet. Each ranked position is worth a certain number of Rank Points - the better the rank, the higher the Rank Points. We add these Rank Points up for each player across all experts to determine the player’s consensus rank.
> 
> We don’t use an "average rank" because we believe this approach is flawed. Using an average requires that you assign an arbitrary rank for unranked players, which skews the results.

https://support.fantasypros.com/hc/en-us/articles/115001219327-What-is-ECR-Expert-Consensus-Rankings-and-how-do-you-calculate-it-

### ADP

Average Draft Position

In [1]:
import glob
from pathlib import Path
from typing import Optional

import pandas as pd
from rapidfuzz import fuzz

pd.set_option('max_rows', 100)

In [2]:
DATA_DIR = Path.cwd() / 'data'
FILE_PATTERN = 'FantasyPros_2021_Draft_*_Rankings.csv'

In [3]:
def convert_str_to_int(s: str) -> Optional[int]:
    if s.strip() == '-':
        return None
    if len(s) > 5:  # for SOS
        return int(s[0])
    else:
        return int(s)

def parse_pos_from_fname(s: str) -> str:
    split_str = s.split('_')
    return split_str[3]

def load_or_merge_files(fnames: list[str], reload: bool = False) -> pd.DataFrame:
    """Load files if combined csv doesn't exist and save, otherwise load existing file"""
    fout = Path(DATA_DIR / 'fantasypros_2021_draft_rankings_all.csv')

    if fout.exists() and not reload:
        return pd.read_csv(fout)

    dfs = []
    for fname in fnames:
        t = pd.read_csv(fname)
        t['pos'] = parse_pos_from_fname(fname)
        dfs.append(t)
    df = pd.concat(dfs)

    # column cleanup
    df.columns = [c.lower() for c in df.columns]
    df = df.rename({'player name': 'name', 'bye week': 'bye'}, axis=1)

    # filter out free agents
    df = df[df['team'] != 'FA']

    # strength of schedule
    df['sos'] = df['sos season'].apply(convert_str_to_int)

    # expert consensus rankings vs average draft position
    df['ecr_vs_adp'] = df['ecr vs. adp'].apply(convert_str_to_int).astype('Int64')  # nullable int
    df = df.drop(['ecr vs. adp', 'sos season'], axis=1)

    # reorder cols and sort
    df = df[['tiers', 'pos', 'name', 'bye', 'rk', 'team', 'sos', 'ecr_vs_adp']]
    df = df.sort_values(['tiers', 'pos', 'ecr_vs_adp'], ascending=[True, False, False])

    df.to_csv(fout, index=False)

    return df

In [4]:
fnames = glob.glob(str(DATA_DIR / FILE_PATTERN))
df = load_or_merge_files(fnames)

In [5]:
df.head()

Unnamed: 0,tiers,pos,name,bye,rk,team,sos,ecr_vs_adp
0,1,WR,Davante Adams,13,1,GB,1,0.0
1,1,WR,Tyreek Hill,12,2,KC,3,0.0
2,1,WR,Stefon Diggs,7,3,BUF,3,0.0
3,1,TE,Mark Andrews,8,4,BAL,4,1.0
4,1,TE,T.J. Hockenson,9,5,DET,4,1.0


## count of tiers by position

In [6]:
tiers_by_pos = pd.pivot_table(df, index='tiers', columns='pos', values='rk', aggfunc='count').fillna(0).astype(int)
tiers_by_pos = tiers_by_pos[['RB', 'WR', 'TE', 'QB', 'K', 'DST']]
tiers_by_pos['total'] = tiers_by_pos.sum(1)

tiers_by_pos

pos,RB,WR,TE,QB,K,DST,total
tiers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,4,3,6,4,3,4,24
2,6,6,5,4,6,3,30
3,5,7,7,8,6,7,40
4,11,10,12,12,8,8,61
5,12,15,11,9,6,10,63
6,14,13,12,7,11,0,57
7,20,24,15,18,0,0,77
8,16,23,20,17,0,0,76
9,11,14,16,7,0,0,48
10,15,22,10,0,0,0,47
