## Bootstrapping

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set working directory
import os
current_dir = os.getcwd()
os.chdir(current_dir.replace('\code', '\data'))

In [2]:
# Load data
data_ii = pd.read_csv('best_ball_mania_ii.csv')
data_iii = pd.read_csv('best_ball_mania_iii.csv')
data_iv = pd.read_csv('best_ball_mania_iv.csv')

# Increase 'roster_points' and 'pick_points' for dataset iv by 4.5
data_iv['pick_points'] = data_iv['pick_points']*4.5
data_iv['roster_points'] = data_iv['roster_points']*4.5

# Columns that are in all datasets
cols = list(set(data_ii.columns) & set(data_iii.columns) & set(data_iv.columns))

# Filter columns
data_ii = data_ii[cols]
data_iii = data_iii[cols]
data_iv = data_iv[cols]

# Concatenate data
data = pd.concat([data_ii, data_iii, data_iv], axis=0)

In [3]:
# Rename 'tournament_entry_id' to 'team_id'
data = data.rename(columns={'tournament_entry_id': 'team_id'})

data = data.drop(columns=['clock', 'tournament_round_number', 'bye_week', 'draft_time'])

# Fit a polynomial regression
p = np.poly1d(np.polyfit(data['overall_pick_number'], data['pick_points'], 2))
data['poly_points'] = p(data['overall_pick_number'])

In [7]:
# Bootstrap the points per position rank
# Find distributions of points per position rank for each position
# Simulate a single draft with 30 QBs, 70 RBs, 100 WRs, and 30 TEs
# Each pick is a random number from the distribution of *centered* points per position rank
# Find the chance that RB1 scores more points than WR1 and so on

def polynomial_points(position_name):
    data_grouped = data.groupby(['draft_id', 'position_name', 'overall_pick_number'])["pick_points"].mean().reset_index()
    data_grouped = data_grouped[['draft_id', 'position_name', 'overall_pick_number', 'pick_points']]
    data_grouped = data_grouped.drop(data_grouped[data_grouped['position_name'] == 'FB'].index)
    data_grouped['pos_rank'] = data_grouped.groupby(['draft_id', 'position_name'])['overall_pick_number'].rank()
    
    data_grouped = data_grouped[((data_grouped['position_name'] == 'QB') & (data_grouped['pos_rank'] <= 30)) |
                                ((data_grouped['position_name'] == 'RB') & (data_grouped['pos_rank'] <= 70)) |
                                ((data_grouped['position_name'] == 'WR') & (data_grouped['pos_rank'] <= 100)) |
                                ((data_grouped['position_name'] == 'TE') & (data_grouped['pos_rank'] <= 30))]
    
    p = np.poly1d(np.polyfit(data_grouped[data_grouped['position_name'] == position_name]['pos_rank'],
                              data_grouped[data_grouped['position_name'] == position_name]['pick_points'], 3))
    
    return p

def variance_regression(position_name, degree=3):
    data_grouped = data.groupby(['draft_id', 'position_name', 'overall_pick_number'])["pick_points"].mean().reset_index()
    data_grouped = data_grouped[['draft_id', 'position_name', 'overall_pick_number', 'pick_points']]
    data_grouped = data_grouped.drop(data_grouped[data_grouped['position_name'] == 'FB'].index)
    data_grouped['pos_rank'] = data_grouped.groupby(['draft_id', 'position_name'])['overall_pick_number'].rank()
    
    data_grouped = data_grouped[((data_grouped['position_name'] == 'QB') & (data_grouped['pos_rank'] <= 30)) |
                                ((data_grouped['position_name'] == 'RB') & (data_grouped['pos_rank'] <= 70)) |
                                ((data_grouped['position_name'] == 'WR') & (data_grouped['pos_rank'] <= 100)) |
                                ((data_grouped['position_name'] == 'TE') & (data_grouped['pos_rank'] <= 30))]
    
    grouped_data = data_grouped[data_grouped['position_name'] == position_name].groupby('pos_rank')['pick_points'].std().reset_index()
    
    # Fit a polynomial regression to the variance data
    z = np.polyfit(grouped_data['pos_rank'], grouped_data['pick_points'], degree)
    p = np.poly1d(z)
    
    return p

def estimations(position_name, max_rank, starter_rank):
    points = polynomial_points(position_name)
    variance_series = variance_regression(position_name)
    
    df = pd.DataFrame()
    df['rank'] = np.arange(1, max_rank+1)
    df['pos_rank'] = position_name + df['rank'].astype(str)
    df['points'] = points(df['rank'])
    
    # For variance, fill missing ranks with zero variance
    df['variance'] = df['rank'].map(variance_series).fillna(0)
    df['top_five'] = df['points'] + 2 * df['variance']
    df['bottom_five'] = df['points'] - 2 * df['variance']
    
    mu = df.loc[df['rank'] == starter_rank, 'points'].values[0]
    df['points'] -= mu
    df['top_five'] -= mu
    df['bottom_five'] -= mu
    
    return df

# Estimations for each position
qb = estimations('QB', max_rank=30, starter_rank=12)
rb = estimations('RB', max_rank=70, starter_rank=29) # 5 Flex
wr = estimations('WR', max_rank=100, starter_rank=41) # 5 Flex
te = estimations('TE', max_rank=30, starter_rank=14) # 2 Flex

# New df with all positions
df_points = pd.concat([qb[['pos_rank', 'points', 'top_five', 'bottom_five']], 
                       rb[['pos_rank', 'points', 'top_five', 'bottom_five']], 
                       wr[['pos_rank', 'points', 'top_five', 'bottom_five']], 
                       te[['pos_rank', 'points', 'top_five', 'bottom_five']]], axis=0)
df_points = df_points.sort_values(by='points', ascending=False)

In [19]:
print(df_points.head(28).reset_index(drop=True))

   pos_rank      points    top_five  bottom_five
0       WR1  105.142159  234.340977   -24.056660
1       QB1  101.793881  231.197223   -27.609460
2       WR2  100.839707  230.024119   -28.344706
3       WR3   96.643433  225.776479   -32.489613
4       RB1   92.590109  220.993035   -35.812818
5       WR4   92.551732  221.597256   -36.493793
6       QB2   89.759796  231.765024   -52.245433
7       TE1   89.401265  168.167722    10.634809
8       WR5   88.562997  217.485651   -40.359656
9       RB2   86.966839  214.232823   -40.299145
10      WR6   84.675624  213.440862   -44.089613
11      RB3   81.566312  207.795183   -44.662559
12      WR7   80.888007  209.462090   -47.686076
13      QB3   78.351185  230.925872   -74.223502
14      WR8   77.198540  205.548535   -51.151455
15      RB4   76.382626  201.670110   -48.904858
16      TE2   74.620818  151.676655    -2.435019
17      WR9   73.605618  201.699397   -54.488161
18      RB5   71.409878  195.847596   -53.027840
19     WR10   70.107