In [1]:
%matplotlib widget

In [2]:
import glob

import database.interface as db
from optimizer.contest import Contest
from optimizer.lineup import Lineup
from optimizer.projection import covariance
from optimizer.projection import usage
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D

import warnings
import jscatter


In [3]:
def all_entered_contests():
    contests = glob.glob('contest_entries/2023/Week*/*_upload.csv')
    contest_ids = []
    for contest_path in contests:
        split_num = contest_path.split('_')[-2]
        try:
            split_num = int(split_num)
        except ValueError:
            try:
                split_num = int(contest_path.split('_')[-3])
            except ValueError:
                split_num = int(contest_path.split('_')[-4])
            
        if split_num not in contest_ids:
            contest_ids.append(split_num)
    return contest_ids

In [4]:
contests = all_entered_contests()
print(contests)
CONTESTS = contests

[147641480, 149743407, 149743434, 149743471, 149743522, 150265279, 150462148, 150934000, 151202093, 151210640, 151386597, 151418586, 151653222]


In [5]:
## Display players and projections to check for any missing data
plotcovproj = []
plotactualproj = []
plotactualcov = []
all_data = []
for CONTEST in CONTESTS:
    contest = Contest.Contest(contest_id=CONTEST, db_interface=db.DFSDBInterface())
    
    constraint = Lineup.LineupConstraint(contest_type='Showdown')
    draft_group = Lineup.DraftGroup(draft_group_id=contest.draft_group_id, db_interface=db.DFSDBInterface())
    
    # Get player projection data
    draft_group.populate_points_data(db.DFSDBInterface())
    draft_group.data['variance'] = draft_group.data['variance'].fillna(1.0)
    
    ## Generate the Field
    
    # Generate all possible lineups
    generator = Lineup.greedyGenerator(constraint, draft_group.data, projections_only=True)
    lineups = generator.generate(verbose=False, limit=None, random=False)
    
    # Get the covariance matrix
    cov_group = covariance.DraftGroupCovariance(draft_group.data)
    cov_matrix = cov_group.get_covariance()
    
    field = Contest.LineupSet(draft_group.data, lineups, cov_matrix)
    data = field.get_lineup_stats()

    def scale_columns(df, cols):
        # set the median to 0, scale by std deviation
        for col in cols:
            df[col + '_scale'] = (df[col] - np.median(df[col])) / np.std(df[col])
        return df

    data = scale_columns(data, ['projection', 'covariance', 'salary', 'actual'])
    all_data.append(data)

all_data = pd.concat(all_data).reset_index()
    # # print(data.head(10))

    # with warnings.catch_warnings():
    #     warnings.simplefilter("ignore")
    #     scatter = jscatter.Scatter(data=data, x='projection', y='covariance')
    #     scatter.color(by='salary')
    #     scatter.size(by='actual')
    #     scatter.opacity(by='actual')
    #     scatter.show()
    #     plotcovproj.append(scatter)

    #     scatter = jscatter.Scatter(data=data, x='projection', y='actual')
    #     scatter.color(by='salary')
    #     scatter.size(by='covariance')
    #     scatter.opacity(by='covariance')
    #     scatter.show()
    #     plotactualproj.append(scatter)

    #     scatter = jscatter.Scatter(data=data, x='covariance', y='actual')
    #     scatter.color(by='salary')
    #     scatter.size(by='projection')
    #     scatter.opacity(by='projection')
    #     scatter.show()
    #     plotactualcov.append(scatter)

In [6]:
# # axis values mirror whichever plot is being interacted with
# jscatter.link(plotcovproj, rows=4, cols=3)

# # axis values mirror whichever plot is being interacted with
# jscatter.link(plotactualcov, rows=4, cols=3)

# # axis values mirror whichever plot is being interacted with
# jscatter.link(plotactualproj, rows=4, cols=3)

In [9]:
plot_data = all_data.copy()
# plot_data = plot_data[(plot_data['covariance_scale'] > 0.) & (plot_data['covariance_scale'] < 5.)]
# plot_data = plot_data[(plot_data['projection_scale'] > 0.) & (plot_data['projection_scale'] < 3.)]
# plot_data = plot_data[plot_data['salary_scale'] > 0.]
# plot_data = plot_data[plot_data['actual_scale'] > 3.]
plot_data = plot_data.sort_values(by='actual_scale', ascending=True).reset_index().dropna()
print(len(plot_data), len(all_data))
# print(plot_data.describe())

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    scatter1 = jscatter.Scatter(data=plot_data, x='projection_scale', y='covariance_scale')
    scatter1.color(by='actual_scale')
    scatter1.size(by='actual_scale')
    scatter1.opacity(by='actual_scale')
    
    scatter2 = jscatter.Scatter(data=plot_data, x='projection_scale', y='actual_scale')
    scatter2.color(by='actual_scale')
    scatter2.size(by='actual_scale')
    scatter2.opacity(by='actual_scale')
    
    scatter3 = jscatter.Scatter(data=plot_data, x='covariance_scale', y='actual_scale')
    scatter3.color(by='actual_scale')
    scatter3.size(by='actual_scale')
    scatter3.opacity(by='actual_scale')

jscatter.link([scatter1, scatter2, scatter3])

1548376 1650284


GridBox(children=(HBox(children=(VBox(children=(Button(button_style='primary', icon='arrows', layout=Layout(wi…

In [8]:
plot_data = all_data.copy()
# plot_data = plot_data[(plot_data['covariance_scale'] > 0.) & (plot_data['covariance_scale'] < 5.)]
# plot_data = plot_data[(plot_data['projection_scale'] > 0.) & (plot_data['projection_scale'] < 3.)]
# plot_data = plot_data[plot_data['salary_scale'] > 0.]
plot_data = plot_data[plot_data['actual_scale'] > 3.]
plot_data = plot_data.sort_values(by='actual_scale', ascending=True).reset_index().dropna()
print(len(plot_data), len(all_data))
# print(plot_data.describe())

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    scatter1 = jscatter.Scatter(data=plot_data, x='projection_scale', y='covariance_scale')
    scatter1.color(by='salary_scale')
    scatter1.size(by='salary_scale')
    scatter1.opacity(by='salary_scale')
    
    scatter2 = jscatter.Scatter(data=plot_data, x='projection_scale', y='actual_scale')
    scatter2.color(by='salary_scale')
    scatter2.size(by='salary_scale')
    scatter2.opacity(by='salary_scale')
    
    scatter3 = jscatter.Scatter(data=plot_data, x='covariance_scale', y='actual_scale')
    scatter3.color(by='salary_scale')
    scatter3.size(by='salary_scale')
    scatter3.opacity(by='salary_scale')

    scatter4 = jscatter.Scatter(data=plot_data, x='salary_scale', y='actual_scale')
    scatter4.color(by='salary_scale')
    scatter4.size(by='salary_scale')
    scatter4.opacity(by='salary_scale')

jscatter.link([scatter1, scatter2, scatter3, scatter4], rows=2)

1602 1650284


GridBox(children=(HBox(children=(VBox(children=(Button(button_style='primary', icon='arrows', layout=Layout(wi…

### The best lineups:
- Are above median salary, with best upside for those closer to \$50k
- Are above median projection, generally around 2$\sigma$ but usually not over 3$\sigma$
- Are above median covariance, but the best are in the realm of 1-3$\sigma$ above and not more
- Sweet spot is between 1-2$\sigma$ above median in both proj and covariance.