In [3]:
%matplotlib inline
import gspread
from gspread.models import Spreadsheet
from gspread_dataframe import set_with_dataframe, get_as_dataframe
from bs4 import BeautifulSoup
import requests
from IPython.display import display, Markdown
import pandas as pd
from urllib.parse import urljoin
from io import StringIO, BytesIO
import re
import numpy as np
import seaborn as sns
import plotly.express as px
from mfl.api import *
from mfl.utils import *

In [2]:
# get google sheet to read and store data
gc = gspread.service_account(
    filename="/Users/grahamflemingthomson/Downloads/grahamflemingthomson-164514-04e450fcd324.json"
)
sh = gc.open_by_key(
    "1OqoSsf5YrsJWRWK1WouiwHeTP_prGVmsP7yURgjQnwo"
)

def write_to_worksheet(sheet: Spreadsheet, worksheet_name: str, df: pd.DataFrame) -> None:
    worksheet_dict = {ws.title: i for i, ws in enumerate(sheet.worksheets())}
    if worksheet_name not in worksheet_dict:
        sheet.add_worksheet(title=worksheet_name, rows=1, cols=1)
        worksheet_dict = {ws.title: i for i, ws in enumerate(sheet.worksheets())}
    set_with_dataframe(
        worksheet=sheet.get_worksheet(worksheet_dict.get(worksheet_name)), 
        dataframe=df
    )
    return None

In [4]:
# globals
ALL_POSITIONS = {'LB', 'WR', 'RB', 'S', 'CB', 'DE', 'QB', 'PN', 'PK', 'TE', 'Off', 'DT'}
POSITIONS = ALL_POSITIONS - {'Off'}
SEASON_YEARS = {2019, 2020}

In [5]:
def append_player_demos(stats_df: pd.DataFrame) -> pd.DataFrame:
    demo_df = pd.concat([df for df in
                stats_df.apply(
                    lambda row: get_player_demos(
                        player_name=row.player_name, 
                        player_path=row.player_path
                    ), axis=1).tolist()
               if df.shape[0] > 0], ignore_index=True)
    return stats_df.merge(demo_df, on="player_name", how="left")

In [None]:
all_dfs = []

for sy in SEASON_YEARS:
    for p in ALL_POSITIONS:
        p_stats = TopPlayers(year=sy, position=p).get_stats()
        all_dfs.append(p_stats)

In [None]:
all_df = pd.concat(all_dfs, ignore_index=True)
all_df.head()

In [24]:
all_df = pd.read_csv("all_df.csv")
all_df.head()

Unnamed: 0,rank,player_name,total_points,average_points,week_1_pts,week_2_pts,week_3_pts,week_4_pts,week_5_pts,week_6_pts,...,week_15_pts,week_16_pts,week_1_opponent,owner,bye_week,salary,years,year,position,player_path
0,1.0,"Lutz, Wil NOS PK",151.8,10.12,15.7,12.8,0.0,11.8,6.9,7.8,...,9.9,9.7,GBP,Hudson River Raiders,9,"$2,500,000",1,2019,PK,player?L=46381&P=12956
1,2.0,"Butker, Harrison KCC PK",148.5,9.9,17.4,4.0,7.8,8.7,7.5,6.1,...,9.1,7.8,CLE,Dababetes Type II,12,"$1,950,000",2,2019,PK,player?L=46381&P=13354
2,3.0,"Tucker, Justin BAL PK",132.7,8.847,11.4,13.2,4.9,6.1,16.7,13.0,...,1.0,7.1,@LVR,The Pterodactyls,8,"$2,000,000",2,2019,PK,player?L=46381&P=10976
3,4.0,"Gay, Matt TBB PK",124.3,8.287,5.1,7.2,7.9,14.9,7.2,6.4,...,8.6,10.1,CHI,FA,7,"$450,000",0,2019,PK,player?L=46381&P=14244
4,5.0,"Boswell, Chris PIT PK",120.1,8.007,1.0,9.4,9.2,10.8,12.3,5.0,...,5.9,5.9,@BUF,FA,7,"$450,000",0,2019,PK,player?L=46381&P=11936


In [7]:
distinct_players = all_df[["player_name", "player_path"]].drop_duplicates()

In [8]:
bio_dfs, status_dfs, stats_dfs = [], [], []

for i, row in distinct_players.iterrows():
    if i % 100 == 0:
        print(f"Index {i}: {row.player_name}")
    p = Player(
        player_name=row.player_name,
        player_path=row.player_path
    )
    try:
        bio_dfs.append(p.get_player_bio())
        status_dfs.append(p.get_player_status())
        stats_dfs.append(p.get_player_stats())
    except:
        continue

Index 0: Lutz, Wil NOS PK
Index 100: Edwards, Mike TBB S
Index 200: Carson, Chris SEA RB
Index 300: Janovich, Andy DEN RB
Index 400: Vea, Vita TBB DT
Index 500: Ryan, Logan TEN CB
Index 600: Johnson, Kevin BUF CB
Index 700: Brady, Tom NEP QB
Index 800: Edelman, Julian NEP WR
Index 900: Coutee, Keke HOU WR
Index 1000: Williams, Chad IND WR
Index 1100: Hyder, Kerry DAL DE
Index 1200: James, Jesse DET TE
Index 1300: Whitehead, Tahir OAK LB
Index 1400: Cole, Dylan HOU LB
Index 1800: Washington, DeAndre MIA RB
Index 2300: Smith, Alex WAS QB
Index 2400: Aiyuk, Brandon SFO WR
Index 2600: Begelton, Reggie GBP WR
Index 2700: Fowler, Dante ATL DE
Index 3100: Correa, Kamalei JAC LB


In [9]:
bio_df = pd.concat(bio_dfs, ignore_index=True)
status_df = pd.concat(status_dfs, ignore_index=True)
stats_df = pd.concat(stats_dfs, ignore_index=True)


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





In [25]:
all_df = all_df\
.merge(bio_df, on=["player_name"], how="left", suffixes=('', '_bio'))\
.merge(status_df, on=["player_name"], how="left", suffixes=('', '_status'))
all_df.head()

Unnamed: 0,rank,player_name,total_points,average_points,week_1_pts,week_2_pts,week_3_pts,week_4_pts,week_5_pts,week_6_pts,...,bye_week_status,contract_info,contract_status,contract_year,expected_return,injury_status,league_status,owned_change,salary_status,started
0,1.0,"Lutz, Wil NOS PK",151.8,10.12,15.7,12.8,0.0,11.8,6.9,7.8,...,6,UFA,2022.0,1,,,Hudson River Raiders,37% (0%),"$2,500,000",70%
1,2.0,"Butker, Harrison KCC PK",148.5,9.9,17.4,4.0,7.8,8.7,7.5,6.1,...,12,UFA,2023.0,2,,,Dababetes Type II,40% (0%),"$1,950,000",89%
2,3.0,"Tucker, Justin BAL PK",132.7,8.847,11.4,13.2,4.9,6.1,16.7,13.0,...,8,UFA,2023.0,2,,,The Pterodactyls,41% (0%),"$2,000,000",86%
3,4.0,"Gay, Matt TBB PK",124.3,8.287,5.1,7.2,7.9,14.9,7.2,6.4,...,11,,,0,,,FA,8% (0%),"$450,000",0%
4,5.0,"Boswell, Chris PIT PK",120.1,8.007,1.0,9.4,9.2,10.8,12.3,5.0,...,7,,,0,,,FA,18% (0%),"$450,000",0%


In [27]:
all_df["height_inches"] = all_df[f"height"].apply(convert_height_to_inches)
all_df["weight_lbs"] = all_df[f"weight"].apply(convert_weight_to_lbs)
all_df["experience_years"] = all_df[f"experience"].apply(convert_weight_to_lbs)
all_df["salary"] = all_df["salary"].apply(parse_digits_to_num)
all_df["dob"] = pd.to_datetime(all_df["dob"])
cols_to_numeric = ["age", "total_points", "average_points"]
for c in cols_to_numeric:
    all_df[c] = all_df[c].astype(float)

In [28]:
all_df.head()

Unnamed: 0,rank,player_name,total_points,average_points,week_1_pts,week_2_pts,week_3_pts,week_4_pts,week_5_pts,week_6_pts,...,bye_week_status,contract_info,contract_status,contract_year,expected_return,injury_status,league_status,owned_change,salary_status,started
0,1.0,"Lutz, Wil NOS PK",151.8,10.12,15.7,12.8,0.0,11.8,6.9,7.8,...,6,UFA,2022.0,1,,,Hudson River Raiders,37% (0%),"$2,500,000",70%
1,2.0,"Butker, Harrison KCC PK",148.5,9.9,17.4,4.0,7.8,8.7,7.5,6.1,...,12,UFA,2023.0,2,,,Dababetes Type II,40% (0%),"$1,950,000",89%
2,3.0,"Tucker, Justin BAL PK",132.7,8.847,11.4,13.2,4.9,6.1,16.7,13.0,...,8,UFA,2023.0,2,,,The Pterodactyls,41% (0%),"$2,000,000",86%
3,4.0,"Gay, Matt TBB PK",124.3,8.287,5.1,7.2,7.9,14.9,7.2,6.4,...,11,,,0,,,FA,8% (0%),"$450,000",0%
4,5.0,"Boswell, Chris PIT PK",120.1,8.007,1.0,9.4,9.2,10.8,12.3,5.0,...,7,,,0,,,FA,18% (0%),"$450,000",0%


In [29]:
write_to_worksheet(sheet=sh, worksheet_name="2019_top_players", df=all_df[all_df["year"] == 2019])
write_to_worksheet(sheet=sh, worksheet_name="2020_top_players", df=all_df[all_df["year"] == 2020])

In [30]:
write_to_worksheet(sheet=sh, worksheet_name="all_player_stats", df=stats_df.drop_duplicates())