In [1]:
%matplotlib inline
import gspread
from gspread.models import Spreadsheet
from gspread_dataframe import set_with_dataframe, get_as_dataframe
from bs4 import BeautifulSoup
import requests
from IPython.display import display, Markdown
import pandas as pd
from urllib.parse import urljoin
from io import StringIO, BytesIO
import re
import numpy as np
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import StandardScaler

In [2]:
# get google sheet to read and store data
gc = gspread.service_account(
    filename="../grahamflemingthomson-164514-04e450fcd324.json"
)
sh = gc.open_by_key(
    "1OqoSsf5YrsJWRWK1WouiwHeTP_prGVmsP7yURgjQnwo"
)

In [3]:
top_2019 = get_as_dataframe(sh.worksheets()[0])
top_2020 = get_as_dataframe(sh.worksheets()[1])
top_2020.head(2)

Unnamed: 0,rank,player_name,total_points,average_points,week_1_pts,week_2_pts,week_3_pts,week_4_pts,week_5_pts,week_6_pts,...,bye_week_status,contract_info,contract_status,contract_year,expected_return,injury_status,league_status,owned_change,salary_status,started
0,1,"Sanders, Jason MIA PK",162.1,10.807,4.6,11.7,7.0,18.7,22.5,5.4,...,14,UFA,2023.0,2.0,,,Hudson River Raiders,35% (0%),1800000.0,0.76
1,2,"Koo, Younghoe ATL PK",153.7,10.979,7.8,17.2,3.5,,14.1,20.1,...,6,UFA,2023.0,2.0,,,The Poop Swatches,39% (0%),2010000.0,0.91


In [4]:
pivoted_tp_2019 = top_2019.pivot(index="player_name", columns="position", values="total_points")
tp_zscores_within_position_2019 = StandardScaler().fit_transform(pivoted_tp_2019)

In [5]:
tp_zscores_within_position_df_2019 = pd.DataFrame(
    tp_zscores_within_position_2019, 
    columns=pivoted_tp_2019.columns, 
    index=pivoted_tp_2019.index
).bfill(axis=1)\
.iloc[:, 0]\
.to_frame("z_score")\
.reset_index()

In [6]:
pivoted_tp_2020 = top_2020.pivot(index="player_name", columns="position", values="total_points")
tp_zscores_within_position_2020 = StandardScaler().fit_transform(pivoted_tp_2020)

In [7]:
tp_zscores_within_position_df_2020 = pd.DataFrame(
    tp_zscores_within_position_2020, 
    columns=pivoted_tp_2020.columns, 
    index=pivoted_tp_2020.index
).bfill(axis=1)\
.iloc[:, 0]\
.to_frame("z_score")\
.reset_index()

In [8]:
top_2019 = top_2019.merge(tp_zscores_within_position_df_2019, on="player_name", how="left")
top_2020 = top_2020.merge(tp_zscores_within_position_df_2020, on="player_name", how="left")

In [9]:
top_all = pd.concat([top_2019, top_2020], ignore_index=True)

In [22]:
top_all["position"].drop_duplicates().to_list()

['PK', 'S', 'CB', 'RB', 'PN', 'DT', 'DE', 'QB', 'Off', 'WR', 'LB', 'TE']

In [None]:
top_all.to_csv("../dash/2019_2020_top_players.csv", index=False, header=True)

In [None]:
# top_2020.to_csv("../dash/2020_top_players.csv", index=False, header=True)
top_2020 = pd.read_csv("../dash/2020_top_players.csv")

In [56]:
zscores = px.box(
    top_all, 
    x="position", 
    y="z_score", 
    color="position",
    points="all", 
    hover_data=["player_name", "total_points", "average_points", "age", "experience_years"],
    labels={"z_score": "foo"}
)

all_points = px.box(
    top_all, 
    x="position", 
    y="total_points", 
    color="position",
    points="all", 
    hover_data=["player_name", "total_points", "z_score", "average_points", "age", "experience_years"],
    labels={"z_score": "foo2"}
)

In [57]:
zscores

In [58]:
all_points

In [70]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=1, shared_xaxes=False)

# update xaxis properties
fig.update_xaxes(title_text="Position", row=2, col=1)

# update yaxis properties
fig.update_yaxes(title_text="Z Score", row=1, col=1)
fig.update_yaxes(title_text="Total Points", row=2, col=1)

n_data = len(zscores.data)
for i in range(n_data):
    b = zscores.data[i]
    b.showlegend = False
    fig.add_trace(b, row=1, col=1)

for i in range(n_data):
    b = all_points.data[i]
    b.showlegend = False
    fig.add_trace(b, row=2, col=1)
    
fig.update_layout(title_text="Some cool plots", height=700)

fig.show()