In [1]:
import os
import requests
import pandas as pd
import json
import glob

from dotenv import load_dotenv

from functools import reduce

from sklearn.preprocessing import StandardScaler


In [2]:
load_dotenv()
files = glob.glob("players_*.csv")
players = pd.concat((pd.read_csv(f) for f in files))
players["slots"] = players["slots"].apply(json.loads)

In [3]:
def best_score(group):
    scores = [tuple(x) for x in group.sort_values("points", ascending=False)[["points", "slots", "player_id"]].values]
    positions = [0, 2, 2, 4, 4, 6, 23, 16, 17]
    used = []
    best = []
    for pos in positions:
        find = list(filter(lambda x: pos in x[1] and x[2] not in used, scores))
        if len(find) > 0:
            used.append(find[0][2])
            best.append(find[0][0])
    return pd.Series({"best": sum(best)})
    
best = players.groupby(["year", "week", "team_id"]).apply(best_score)

In [4]:
actual = players[~players["position_id"].isin([20,21])].groupby(["year", "week", "team_id"])["points"].sum()
actual

year  week  team_id
2018  0     17           0.0
      1     1          143.6
            2          157.7
            3          142.0
            7          190.8
                       ...  
2020  13    13         178.5
            15         305.0
            16         245.7
            17         221.0
            18         162.5
Name: points, Length: 482, dtype: float64

In [5]:
together = actual.reset_index().merge(best.reset_index(), left_on=["year", "week", "team_id"], right_on=["year", "week", "team_id"])
together["diff"] = together["points"] - together["best"]
together.head()

Unnamed: 0,year,week,team_id,points,best,diff
0,2018,0,17,0.0,0.0,0.0
1,2018,1,1,143.6,191.8,-48.2
2,2018,1,2,157.7,180.9,-23.2
3,2018,1,3,142.0,163.3,-21.3
4,2018,1,7,190.8,241.5,-50.7


In [6]:
together.to_csv("score_project_actual.csv")

In [7]:
scaler = StandardScaler()
def zscore(group):
    scaled = scaler.fit_transform(X=group[['diff']])
    group["scaled_diff"] = scaled
    return group

scores = together.groupby(["year", "team_id"]).agg( {'diff':sum })
scores = scores.groupby(["year"]).apply(zscore)
scores.head(12)

Unnamed: 0_level_0,Unnamed: 1_level_0,diff,scaled_diff
year,team_id,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,1,-1409.7,-2.356939
2018,2,-454.3,0.581335
2018,3,-321.4,0.99006
2018,7,-362.9,0.86243
2018,9,-546.5,0.297779
2018,10,-1059.9,-1.281151
2018,12,-362.3,0.864275
2018,13,-547.7,0.294089
2018,15,-842.1,-0.61132
2018,16,-845.6,-0.622084


In [8]:
scores.to_csv("scaled_points.csv")