In [285]:
## DraftKings Scoring Cats:
## Outs, Strikeout, Win, Earned Run, Hit, Walk, Hit Batter
## 
## Fanduel Scoring Cats:
## Win, Quality Start, Earned Run, Strikout, Out

In [1]:
## Import packages

import pandas as pd
import os
import numpy as np
import math
from scipy.stats import pearsonr
import requests
from datetime import date
from datetime import datetime
from statistics import mean


pd.set_option('display.max_columns', None)

In [2]:
## Input date for projections
## Get slate and player information from DFS sites

today = "2019-JUN-21"

response = requests.get(f'https://api.sportsdata.io/api/mlb/fantasy/json/DfsSlatesByDate/{today}', headers={'Ocp-Apim-Subscription-Key': '6fcab751d8594ce9909283dcdc522d24'})
games = response.json()
df_slates = pd.json_normalize(games)
df_slates = df_slates[['SlateID', 'Operator', 'OperatorSlateID', 'OperatorName', 'NumberOfGames', 'OperatorGameType', 'SalaryCap']]
df_player_sal = pd.json_normalize(games, record_path =['DfsSlatePlayers'])
df_player_sal = df_player_sal[(df_player_sal['OperatorPosition'] == 'SP') | (df_player_sal['OperatorPosition'] == 'RP') | (df_player_sal['OperatorPosition'] == 'P')].reset_index(drop=True)
df_player_sal = df_player_sal.merge(df_slates, how='left', on='SlateID')

df_player_sal.head()

Unnamed: 0,SlatePlayerID,SlateID,SlateGameID,PlayerID,PlayerGameProjectionStatID,OperatorPlayerID,OperatorSlatePlayerID,OperatorPlayerName,OperatorPosition,OperatorSalary,RemovedByOperator,Team,TeamID,OperatorRosterSlots,Operator,OperatorSlateID,OperatorName,NumberOfGames,OperatorGameType,SalaryCap
0,4255356,9508,58231.0,10000787,2712430.0,548447,12847947,Jacob deGrom,SP,11000,False,NYM,18.0,[UTIL],DraftKings,27878,NYM vs CHC,1,Showdown,50000.0
1,4255357,9508,58231.0,10000787,2712430.0,548447,12848085,Jacob deGrom,SP,16500,False,NYM,18.0,[CPT],DraftKings,27878,NYM vs CHC,1,Showdown,50000.0
2,4255358,9508,58231.0,10000931,2712483.0,202810,12847946,Cole Hamels,SP,11000,False,CHC,9.0,[UTIL],DraftKings,27878,NYM vs CHC,1,Showdown,50000.0
3,4255359,9508,58231.0,10000931,2712483.0,202810,12848084,Cole Hamels,SP,16500,False,CHC,9.0,[CPT],DraftKings,27878,NYM vs CHC,1,Showdown,50000.0
4,4255360,9508,58231.0,10003212,2712481.0,577809,12847949,Kyle Hendricks,SP,11000,False,CHC,9.0,[UTIL],DraftKings,27878,NYM vs CHC,1,Showdown,50000.0


In [3]:
## Get game logs for in season performance calculations
## Can use the PlayerSeason files rather than PlayerGame once projections go live
## We may be able to use game logs from prior to 2018? Will need to change this to an API call

#os.chdir('..')
cwd = os.getcwd()
os.chdir(cwd + '/Fantasy.2018-2021')

## Currently using all past games we have access to for testing
## In practice you'd just need the current season data file
game_stats = pd.read_csv('PlayerGame.2019.csv')


  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
# Select only regular season data (should only be needed on backtesting)
league_stats = game_stats.loc[game_stats.SeasonType == 1].reset_index(drop=True)

# Select only batter data
league_stats = league_stats.loc[league_stats.PositionCategory != 'P'].reset_index(drop=True)

# For beta testing only, need to select only games from prior to the request date
league_stats['Day'] = league_stats['Day'].astype('datetime64[ns]')
league_stats['Day'] = league_stats['Day'].dt.date
date_object = datetime.strptime(today, '%Y-%b-%d').date()

league_stats = league_stats[league_stats['Day'] < date_object].reset_index(drop=True)


In [5]:
league_hbp = league_stats.HitByPitch.sum()
league_hr = league_stats.HomeRuns.sum()
league_bb = league_stats.Walks.sum()
league_so = league_stats.Strikeouts.sum()

In [6]:
## Parse dataframe to only relevant data

# Get list of players on that day's slate
players = list(df_player_sal.PlayerID.unique())

# Select only regular season data (should only be needed on backtesting)
game_stats = game_stats.loc[game_stats.SeasonType == 1].reset_index(drop=True)

# Select only pitcher data
game_stats = game_stats.loc[game_stats.PositionCategory == 'P'].reset_index(drop=True)

# Just changes name of dataframe
data = game_stats.sort_values(['PlayerID', 'Day'], ascending=True).reset_index(drop=True)

# For beta testing only, need to select only games from prior to the request date
data['Day'] = data['Day'].astype('datetime64[ns]')
data['Day'] = data['Day'].dt.date
date_object = datetime.strptime(today, '%Y-%b-%d').date()

data = data[data['Day'] < date_object].reset_index(drop=True)

data.rename(columns = {'Wins': 'W', 'PitchingEarnedRuns': 'ER', 'PitchingWalks': 'BB', 'PitchingStrikeouts': 'SO', 'PitchingHomeRuns': 'HR', 'PitchingHits': 'H'}, inplace = True) 
data['H-HR'] = data.apply(lambda row: row['H'] - row['HR'], axis=1)

data.head()

Unnamed: 0,StatID,TeamID,PlayerID,SeasonType,Season,Name,Team,Position,PositionCategory,Started,InjuryStatus,GameID,OpponentID,Opponent,Day,DateTime,HomeOrAway,Games,FantasyPoints,AtBats,Runs,Hits,Singles,Doubles,Triples,HomeRuns,RunsBattedIn,BattingAverage,Outs,Strikeouts,Walks,HitByPitch,Sacrifices,SacrificeFlies,GroundIntoDoublePlay,StolenBases,CaughtStealing,OnBasePercentage,SluggingPercentage,OnBasePlusSlugging,W,Losses,Saves,InningsPitchedDecimal,TotalOutsPitched,InningsPitchedFull,InningsPitchedOuts,EarnedRunAverage,H,PitchingRuns,ER,BB,SO,HR,PitchesThrown,PitchesThrownStrikes,WalksHitsPerInningsPitched,PitchingBattingAverageAgainst,FantasyPointsFanDuel,FantasyPointsDraftKings,WeightedOnBasePercentage,PitchingCompleteGames,PitchingShutOuts,PitchingOnBasePercentage,PitchingSluggingPercentage,PitchingOnBasePlusSlugging,PitchingStrikeoutsPerNineInnings,PitchingWalksPerNineInnings,PitchingWeightedOnBasePercentage,H-HR
0,2587349,32,10000001,1,2019,Chase Anderson,MIL,SP,P,0,,54171,31,STL,2019-03-29,3/29/2019 8:10:00 PM,HOME,1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,6.0,2.0,0.0,4.5,2.0,1.0,1.0,2.0,3.0,0.0,34.0,23.0,2.0,0.25,12.0,6.1,0.0,0.0,0.0,0.4,0.375,0.775,13.5,9.0,0.3,2.0
1,2616324,32,10000001,1,2019,Chase Anderson,MIL,SP,P,0,,54374,1,LAD,2019-04-14,4/14/2019 4:10:00 PM,AWAY,1,1.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.666667,8.0,2.0,2.0,3.375,4.0,1.0,1.0,0.0,3.0,1.0,38.0,24.0,1.5,0.333,14.0,7.6,0.0,0.0,0.0,0.333,0.667,1.0,10.12,0.0,0.417,3.0
2,2619174,32,10000001,1,2019,Chase Anderson,MIL,SP,P,0,,54406,31,STL,2019-04-16,4/16/2019 7:40:00 PM,HOME,1,1.33,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.333333,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,17.0,12.0,0.75,0.25,10.0,6.4,0.0,0.0,0.0,0.25,0.25,0.5,13.5,0.0,0.25,1.0
3,2625922,32,10000001,1,2019,Chase Anderson,MIL,P,P,1,,54461,1,LAD,2019-04-20,4/20/2019 7:10:00 PM,HOME,1,9.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,5.0,15.0,5.0,0.0,0.0,1.0,0.0,0.0,2.0,5.0,0.0,82.0,51.0,0.6,0.062,36.0,23.45,0.0,0.0,0.0,0.167,0.062,0.229,9.0,3.6,0.111,1.0
4,2635585,32,10000001,1,2019,Chase Anderson,MIL,P,P,1,,54537,18,NYM,2019-04-26,4/26/2019 7:10:00 PM,AWAY,1,2.66,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.666667,14.0,4.0,2.0,3.857,5.0,2.0,2.0,3.0,4.0,0.0,96.0,61.0,1.714,0.263,20.0,9.7,0.0,0.0,0.0,0.364,0.421,0.785,7.71,5.79,0.364,5.0


In [7]:
league_innings = data.InningsPitchedDecimal.sum()
league_ER = data.ER.sum()

In [8]:
## This next step is necessary for creating projections for players without in season data or a pre season projection
## Average all stats by position

average_stats_by_position = data[['Started', 'W', 'TotalOutsPitched', 'ER', 'BB', 'SO', 'HR', 'H', 'H-HR']].reset_index(drop=True).groupby(['Started']).sum()

average_stats_by_position

Unnamed: 0_level_0,W,TotalOutsPitched,ER,BB,SO,HR,H,H-HR
Started,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,425.0,24046.0,4006.0,3416.0,8251.0,1196.0,7554.0,6358.0
1,691.0,35621.0,5812.0,3970.0,11201.0,1835.0,11464.0,9629.0


In [9]:
## Get player stats from prior season to establish a prior league average
## Players with no current data get a league average projection for their position
## The prior seasons league average gets a bit of weight during the current season

os.chdir('..')
cwd = os.getcwd()
os.chdir(cwd + '/Fantasy.2018-2021')
prior_season_stats = pd.read_csv('PlayerSeason.2018.csv')

In [10]:
## Clean up prior season data

prior_season_stats = prior_season_stats.loc[prior_season_stats.SeasonType == 1].reset_index(drop=True)
prior_season_stats = prior_season_stats.loc[prior_season_stats.PositionCategory == 'P'].reset_index(drop=True)
prior_season_stats['isPrimaryStarter'] = prior_season_stats.apply(lambda row: 1 if row['Started'] > (row['Games'] - row['Started']) else 0, axis=1)
prior_season_stats.rename(columns = {'Wins': 'W', 'PitchingEarnedRuns': 'ER', 'PitchingWalks': 'BB', 'PitchingStrikeouts': 'SO', 'PitchingHomeRuns': 'HR', 'PitchingHits': 'H'}, inplace = True)
prior_season_stats['H-HR'] = prior_season_stats.apply(lambda row: row['H'] - row['HR'], axis=1)


In [11]:
## Find per out averages for each relevant position

prior_season_league_stats = prior_season_stats[['isPrimaryStarter', 'W', 'TotalOutsPitched', 'ER', 'BB', 'SO', 'HR', 'H', 'H-HR']].reset_index(drop=True).groupby(['isPrimaryStarter']).sum()
prior_season_league_stats = prior_season_league_stats.div(prior_season_league_stats.TotalOutsPitched, axis=0)
prior_season_league_stats


Unnamed: 0_level_0,W,TotalOutsPitched,ER,BB,SO,HR,H,H-HR
isPrimaryStarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.017069,1.0,0.155079,0.131278,0.327685,0.040607,0.315696,0.275089
1,0.019893,1.0,0.151899,0.111553,0.307177,0.044256,0.312845,0.268589


In [12]:
## Find a league average to use in the current season

positions = [0, 1]

for pos in positions:
    total = prior_season_league_stats.loc[pos] * 15000 + average_stats_by_position.loc[pos]
    new_row = total.divide(total.TotalOutsPitched)
    average_stats_by_position.loc[pos] = new_row
    

In [13]:
## Select only data for players on the current days slates
## Sum all this data to find current season totals for each player

data = data.loc[data['PlayerID'].isin(players)]
sum_data = data[['PlayerID', 'Started', 'Games', 'W', 'TotalOutsPitched', 'ER', 'BB', 'SO', 'H', 'HR', 'H-HR']].reset_index(drop=True).groupby(['PlayerID']).sum()
sum_data.head()

Unnamed: 0_level_0,Started,Games,W,TotalOutsPitched,ER,BB,SO,H,HR,H-HR
PlayerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10000001,8,14,3.0,140.0,21.0,19.0,49.0,46.0,8.0,38.0
10000004,11,14,2.0,173.0,46.0,20.0,46.0,61.0,16.0,45.0
10000005,0,37,0.0,73.0,9.0,11.0,29.0,25.0,2.0,23.0
10000007,15,15,6.0,277.0,40.0,32.0,102.0,79.0,13.0,66.0
10000012,8,9,2.0,117.0,27.0,20.0,30.0,47.0,9.0,38.0


In [14]:
## Get pre season projections file

os.chdir('..')
cwd = os.getcwd()
os.chdir(cwd + '/BaselineProjections')
marcels = pd.read_csv('marcel_pitchers_2019.csv')


In [15]:
## Add reliability metric to current season data

marcels['TotalOutsPitched'] = marcels.apply(lambda row: row['IP'] * 3, axis=1)
marcels['Name'] = marcels.apply(lambda row: row['First'] + ' ' + row['Last'], axis=1)
marcels.rename(columns = {'rel': 'Reliability'}, inplace = True)
df = marcels[['Name', 'Reliability']]
data = data.merge(df, how='left', on='Name')

In [16]:
## Create dictionary of reliability metric for ease of use

reliability_dict = {}

for index, row in data.iterrows():
    if math.isnan(row['Reliability']) == True:
        reliability_dict[row['PlayerID']] = 0
    else:
        reliability_dict[row['PlayerID']] = row['Reliability']

In [17]:
## Create dictionary of current season total stats and stabilization factors for each player
## Separate out HR from non HR

player_dict = {}
if sum_data.shape[0] == 0: pass # need to include something for the first day of the season
else:
    player_id_list = list(sum_data.index.values)
for player in player_id_list:
    player_dict[player] = {}
    for stat in ['TotalOutsPitched', 'ER', 'BB', 'SO', 'H', 'HR', 'H-HR']:
        value = sum_data.loc[player, stat]
        player_dict[player][stat] = value

    Outs = player_dict[player]['TotalOutsPitched']
    ER = player_dict[player]['ER']
    BB = player_dict[player]['BB']
    K = player_dict[player]['SO']
    H = player_dict[player]['H']
    HR = player_dict[player]['HR']
    H_HR = player_dict[player]['H-HR']
    
    PA_est = Outs + H + BB

    rel = reliability_dict[player]
    rel_fact = (2.2 ** rel) / 2

    player_dict[player]['SO_s'] = PA_est / (PA_est + (126 * rel_fact))
    player_dict[player]['BB_s'] = PA_est / (PA_est + (303 * rel_fact))
    player_dict[player]['H-HR_s'] = (PA_est - BB - HR - K) / ((PA_est - BB - HR - K) + (3729 * rel_fact))
    player_dict[player]['HR_s'] = (PA_est - BB - K) / (((PA_est - BB - K) + (1271 * rel_fact)))


In [18]:
## Convert full pre season projections to per out rates
## Had to delete duplicate names manually from marcels files

data_ID = data[['PlayerID', 'Name']].drop_duplicates()
marcels = marcels.merge(data_ID, how='left', on='Name')
marcels = marcels[marcels['PlayerID'].notna()]
marcels = marcels.set_index('PlayerID')
rel_columns = marcels.columns.to_list()[8:31]
marcels = marcels[rel_columns]
marcels = marcels.div(marcels.TotalOutsPitched, axis=0)
marcels['H-HR'] = marcels.apply(lambda row: row['H'] - row['HR'], axis=1)
marcel_players = marcels.index.to_list()
marcels_dict = marcels.to_dict('index')

In [19]:
## Find main position for each player

new = df_player_sal['OperatorPosition'].str.split('/', n = 1, expand = True)
df_player_sal['EffectivePosition'] = new[0]

In [20]:
## Create blended projections for the request date
## Different methods based on availability of pre season projections

player_projs_dict = {}

for player in players:
    
    if player in marcel_players:
    
        new_player_dict = {}
        stat_list = ['SO', 'BB', 'HR', 'H-HR']
        stab_list = ['SO_s', 'BB_s', 'HR_s', 'H-HR_s']
        i = 0

        try:
            player_proj = player_dict[player]
        except:
            
            for stat in stat_list:
                new_player_dict[stat] = marcels_dict[player][stat]
                
            player_projs_dict[player] = new_player_dict
            continue


        Outs = player_proj['TotalOutsPitched']
        
        if Outs == 0:
            
            for stat in stat_list:
                new_player_dict[stat] = marcels_dict[player][stat]
                
            player_projs_dict[player] = new_player_dict
            continue
            

        for stat in stat_list:

            stat_exp = marcels_dict[player][stat] * Outs
            stat_act = player_proj[stat]
            stat_blend = (stat_act * player_proj[stab_list[i]]) + (stat_exp * (1 - player_proj[stab_list[i]]))

            new_player_dict[stat_list[i]] = stat_blend / Outs

            i += 1

        player_projs_dict[player] = new_player_dict
    
    else:
        
        if player in player_dict: # Check if they've played games
            # No marcels, but games (2)
            # position average acts as default marcels projections
            player_proj = player_dict[player]
            
            starts = sum_data.loc[player, 'Started']
            games = sum_data.loc[player, 'Games']
            
            if starts > (games - starts):
                eff_pos = 1
            else:
                eff_pos = 0
            
            proj_by_position = average_stats_by_position.loc[eff_pos]
            proj_by_position = proj_by_position.divide(proj_by_position.TotalOutsPitched)
            proj_by_pos_dict = proj_by_position.to_dict()
            
            new_player_dict = {}
            stat_list = ['SO', 'BB', 'HR', 'H-HR']
            stab_list = ['SO_s', 'BB_s', 'HR_s', 'H-HR_s']
            i = 0
            
            Outs = player_proj['TotalOutsPitched']

            if Outs == 0:

                for stat in stat_list:
                    new_player_dict[stat] = proj_by_pos_dict[stat]

                player_projs_dict[player] = new_player_dict
                continue


            for stat in stat_list:

                stat_exp = proj_by_pos_dict[stat] * Outs
                stat_act = player_proj[stat]
                stat_blend = (stat_act * player_proj[stab_list[i]]) + (stat_exp * (1 - player_proj[stab_list[i]]))

                new_player_dict[stat_list[i]] = stat_blend / Outs

                i += 1

            player_projs_dict[player] = new_player_dict
            
        else:

            # Find effective position

            eff_pos = 0
            proj_by_position = average_stats_by_position.loc[eff_pos]
            proj_by_position = proj_by_position.divide(proj_by_position.TotalOutsPitched)
            proj_by_pos_dict = proj_by_position.to_dict()
            player_projs_dict[player] = proj_by_pos_dict       
        


In [21]:
## Add stat projections to salary data
## Need to make sure that marcels names match with names form sportsdata file 
## Or create master file of player id's

df_player_sal['pSO/Out'] = df_player_sal.apply(lambda row: round(player_projs_dict[row['PlayerID']]['SO'], 3) if row['PlayerID'] in player_projs_dict else np.NaN, axis=1)
df_player_sal['pBB/Out'] = df_player_sal.apply(lambda row: round(player_projs_dict[row['PlayerID']]['BB'], 3) if row['PlayerID'] in player_projs_dict else np.NaN, axis=1)
df_player_sal['pHR/Out'] = df_player_sal.apply(lambda row: round(player_projs_dict[row['PlayerID']]['HR'], 3) if row['PlayerID'] in player_projs_dict else np.NaN, axis=1)
df_player_sal['pH-HR/Out'] = df_player_sal.apply(lambda row: round(player_projs_dict[row['PlayerID']]['H-HR'], 3) if row['PlayerID'] in player_projs_dict else np.NaN, axis=1)
df_player_sal['pHBP/Out'] = df_player_sal.apply(lambda row: round(league_hbp / (league_innings * 3), 3), axis=1)

In [22]:
## What else do we need?

## Wins, QS, Total Outs, HBP, Earned Runs


In [23]:
## For now will use league average HBP rate for pitchers, but once files are combined can use matchup specific rates

league_ERA = (9 / league_innings) * league_ER
FIP_constant = league_ERA - (((13 * league_hr) + (3 * (league_bb + league_hbp)) - (2 * league_so)) / league_innings)
FIP_constant

3.0737090854247526

In [24]:
## Wins = Vegas Moneyline * Prob. of going at least 5 innings * Prob. of bullpen not blowing it
## QS = Prob of going at least 5 innings * Prob. of allowing 3 or less runs
## HBP = League average hitter HBP rate - check
## Earned Runs - ERA Estimators or just use ER in the blended projections
## Total Outs - distribution of potential outs - check

In [25]:
# Prior year average Outs/Start for the league - check
# Current year average Outs/Start for the league - check
# Current year average Outs/Start against the team in question - check
# Current year average Outs/Start by the pitcher - check
# Prior year average Outs/Start by the pitcher - check

In [26]:

os.chdir('..')
cwd = os.getcwd()
os.chdir(cwd + '/Fantasy.2018-2021')

game_stats_prior = pd.read_csv('PlayerGame.2018.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [27]:
# Select only regular season data (should only be needed on backtesting)
game_stats_prior = game_stats_prior.loc[game_stats_prior.SeasonType == 1].reset_index(drop=True)

# Select only pitcher data
game_stats_prior = game_stats_prior.loc[game_stats_prior.PositionCategory == 'P'].reset_index(drop=True)
# Select only starts
game_stats_prior = game_stats_prior.loc[game_stats_prior.Started == 1].reset_index(drop=True)
#game_stats_prior = game_stats_prior[['PlayerID', 'Started', 'TotalOutsPitched']]


In [28]:
game_stats_prior.head()

Unnamed: 0,StatID,TeamID,PlayerID,SeasonType,Season,Name,Team,Position,PositionCategory,Started,InjuryStatus,GameID,OpponentID,Opponent,Day,DateTime,HomeOrAway,Games,FantasyPoints,AtBats,Runs,Hits,Singles,Doubles,Triples,HomeRuns,RunsBattedIn,BattingAverage,Outs,Strikeouts,Walks,HitByPitch,Sacrifices,SacrificeFlies,GroundIntoDoublePlay,StolenBases,CaughtStealing,OnBasePercentage,SluggingPercentage,OnBasePlusSlugging,Wins,Losses,Saves,InningsPitchedDecimal,TotalOutsPitched,InningsPitchedFull,InningsPitchedOuts,EarnedRunAverage,PitchingHits,PitchingRuns,PitchingEarnedRuns,PitchingWalks,PitchingStrikeouts,PitchingHomeRuns,PitchesThrown,PitchesThrownStrikes,WalksHitsPerInningsPitched,PitchingBattingAverageAgainst,FantasyPointsFanDuel,FantasyPointsDraftKings,WeightedOnBasePercentage,PitchingCompleteGames,PitchingShutOuts,PitchingOnBasePercentage,PitchingSluggingPercentage,PitchingOnBasePlusSlugging,PitchingStrikeoutsPerNineInnings,PitchingWalksPerNineInnings,PitchingWeightedOnBasePercentage
0,2157292,19,10000095,1,2018,Dylan Bundy,BAL,SP,P,1,,50597,20,MIN,3/29/2018 12:00:00 AM,3/29/2018 3:05:00 PM,HOME,1,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,21.0,7.0,0.0,0.0,5.0,0.0,0.0,1.0,7.0,0.0,88.0,64.0,0.857,0.2,46.0,26.15,0.0,0.0,0.0,0.231,0.2,0.431,9.0,1.29,0.192
1,2157291,20,10001203,1,2018,Jake Odorizzi,MIN,SP,P,1,,50597,19,BAL,3/29/2018 12:00:00 AM,3/29/2018 3:05:00 PM,AWAY,1,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,18.0,6.0,0.0,0.0,2.0,0.0,0.0,2.0,7.0,0.0,93.0,55.0,0.667,0.1,43.0,25.1,0.0,0.0,0.0,0.182,0.15,0.332,10.5,3.0,0.182
2,2157444,16,10000519,1,2018,James Shields,CHW,SP,P,1,,50598,5,KC,3/29/2018 12:00:00 AM,3/29/2018 4:15:00 PM,AWAY,1,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6.0,18.0,6.0,0.0,6.0,5.0,4.0,4.0,1.0,0.0,1.0,97.0,59.0,1.0,0.217,12.0,5.3,0.0,0.0,0.0,0.28,0.391,0.671,0.0,1.5,0.28
3,2157445,5,10000506,1,2018,Danny Duffy,KC,SP,P,1,,50598,16,CHW,3/29/2018 12:00:00 AM,3/29/2018 4:15:00 PM,HOME,1,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,4.0,12.0,4.0,0.0,11.25,7.0,5.0,5.0,2.0,5.0,3.0,78.0,46.0,2.25,0.389,12.0,3.6,0.0,0.0,0.0,0.45,1.056,1.506,11.25,4.5,0.6
4,2157340,30,10000432,1,2018,Justin Verlander,HOU,SP,P,1,,50599,28,TEX,3/29/2018 12:00:00 AM,3/29/2018 3:35:00 PM,AWAY,1,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,6.0,18.0,6.0,0.0,0.0,4.0,0.0,0.0,2.0,5.0,0.0,90.0,53.0,1.0,0.2,43.0,23.3,0.0,0.0,0.0,0.304,0.2,0.504,7.5,3.0,0.261


In [29]:
game_stats_prior['ER/out'] = game_stats_prior.apply(lambda row: row['PitchingEarnedRuns'] / row['TotalOutsPitched'] if row['TotalOutsPitched'] > 0 else 0, axis=1)
game_stats_prior['ER/out'].agg(['sum', 'mean', 'std'])


sum     977.878441
mean      0.201417
std       0.295607
Name: ER/out, dtype: float64

In [30]:
prior_year_ind_pitcher_dist = game_stats_prior.groupby('PlayerID').TotalOutsPitched.agg(['sum', 'mean', 'std']).fillna(0)
prior_year_league_innings_dist = game_stats_prior.TotalOutsPitched.agg(['sum', 'mean', 'std'])
current_year_starts = data.loc[data.Started == 1].reset_index(drop=True)
current_year_ind_pitcher_dist = current_year_starts.groupby('PlayerID').TotalOutsPitched.agg(['sum', 'mean', 'std']).fillna(0)
current_year_league_innings_dist = current_year_starts.TotalOutsPitched.agg(['sum', 'mean', 'std'])
current_year_outs = current_year_league_innings_dist['sum']
weighted_league_innings_dist_mean = ((current_year_league_innings_dist['mean'] * current_year_outs) + (prior_year_league_innings_dist['mean'] * 10000)) / (current_year_outs + 10000)
weighted_league_innings_dist_std = ((current_year_league_innings_dist['std'] * current_year_outs) + (prior_year_league_innings_dist['std'] * 10000)) / (current_year_outs + 10000)
current_year_starts_vs_team = data.loc[data.Started == 1].groupby('OpponentID').TotalOutsPitched.agg(['sum', 'mean', 'std']).fillna(0)



In [39]:
## We now have pitcher outs distributions, need to add Vegas data to find probable starters

today = "2019-MAY-21"
#today = date.today()
#today = today.strftime('%Y-%b-%d')

response = requests.get(f'https://api.sportsdata.io/api/mlb/fantasy/json/DfsSlatesByDate/{today}', headers={'Ocp-Apim-Subscription-Key': '6fcab751d8594ce9909283dcdc522d24'})
games = response.json()
df_games = pd.json_normalize(games, record_path =['DfsSlateGames'])
df_games = df_games[['SlateGameID', 'GameID', 'OperatorGameID', 'Game.Season', 'Game.Day', 'Game.AwayTeam', 'Game.HomeTeam', 'Game.AwayTeamID', 'Game.HomeTeamID', 'Game.StadiumID', 'Game.AwayTeamProbablePitcherID', 'Game.HomeTeamProbablePitcherID', 'Game.PointSpread', 'Game.OverUnder', 'Game.AwayTeamMoneyLine', 'Game.HomeTeamMoneyLine']]

result_df = df_player_sal.merge(df_games, how='left', on = ['SlateGameID'])

In [32]:
result_df['HomeOrAway'] = result_df.apply(lambda row: 'AWAY' if row['Game.AwayTeamID'] == row['TeamID'] else 'HOME', axis=1)
result_df['PlayerTeamMoneyLine'] = result_df.apply(lambda row: row['Game.AwayTeamMoneyLine'] if row['HomeOrAway'] == 'AWAY' else row['Game.HomeTeamMoneyLine'], axis=1)
result_df['PlayerTeamPointSpread'] = result_df.apply(lambda row: abs(row['Game.PointSpread']) * -1 if row['PlayerTeamMoneyLine'] < 0 else abs(row['Game.PointSpread']), axis=1)
result_df['PlayerTeamVegasWinProb'] = result_df.apply(lambda row: 100 / (100 + row['PlayerTeamMoneyLine']) if row['PlayerTeamMoneyLine'] > 0 else row['PlayerTeamMoneyLine'] / (row['PlayerTeamMoneyLine'] - 100), axis=1)
result_df['PlayerTeamTotal'] = result_df.apply(lambda row: round((row['Game.OverUnder'] / 2) - ((row['PlayerTeamPointSpread'] * (100 / (abs(row['PlayerTeamMoneyLine']) + 100))) / 2), 2), axis=1)


In [50]:
starting_pitchers = [x for x in starting_pitchers if str(x) != 'nan']

In [51]:
starting_pitchers

[10005249.0,
 10000066.0,
 10001154.0,
 10000004.0,
 10000328.0,
 10001161.0,
 10005708.0,
 10000845.0,
 10007053.0,
 10000247.0,
 10007377.0,
 10000276.0,
 10006871.0,
 10001305.0,
 10000155.0,
 10005787.0,
 10007391.0,
 10001187.0,
 10000613.0,
 10006182.0,
 10005351.0,
 10000807.0,
 10000618.0,
 10000880.0,
 10005808.0,
 10000752.0,
 10000432.0,
 10000886.0,
 10005303.0,
 10001337.0]

In [48]:
starting_pitchers = list(set(list(df_games['Game.AwayTeamProbablePitcherID'])).union(set(list(df_games['Game.HomeTeamProbablePitcherID']))))
len(starting_pitchers)

36

In [34]:
all_starters = {}

for starter in starting_pitchers:
    starter_team = result_df.loc[result_df.PlayerID == starter,'TeamID'].reset_index(drop=True)[0]
    home_team = result_df.loc[result_df.PlayerID == starter,'Game.HomeTeamID'].reset_index(drop=True)[0]
    away_team = result_df.loc[result_df.PlayerID == starter,'Game.AwayTeamID'].reset_index(drop=True)[0]
    if starter_team == home_team:
        opponent_id = away_team
    else:
        opponent_id = home_team
        
    starter_team_w_pct = result_df.loc[result_df.PlayerID == starter,'PlayerTeamVegasWinProb'].reset_index(drop=True)[0]
        
        
    try:    
        mean_vs_team = current_year_starts_vs_team.loc[opponent_id]['mean']
        total_outs_vs_team = current_year_starts_vs_team.loc[opponent_id]['sum']
        weighted_outs = total_outs_vs_team / 10
    except:
        mean_vs_team = weighted_league_innings_dist_mean
        weighted_outs = 0
        
    if starter in current_year_ind_pitcher_dist.index:
        current_year_pitcher_outs = current_year_ind_pitcher_dist.loc[starter]['sum']
        if starter in prior_year_ind_pitcher_dist.index:
            prior_year_pitcher_outs = prior_year_ind_pitcher_dist.loc[starter]['sum']
            total_outs = current_year_pitcher_outs + prior_year_pitcher_outs
            mean_of_starter = ((current_year_ind_pitcher_dist.loc[starter]['mean'] * current_year_pitcher_outs) + (prior_year_ind_pitcher_dist.loc[starter]['mean'] * prior_year_pitcher_outs)) / total_outs
        else:
            mean_of_starter = ((current_year_ind_pitcher_dist.loc[starter]['mean'] * current_year_pitcher_outs) + (weighted_league_innings_dist_mean * 100)) / (current_year_pitcher_outs + 100)
            total_outs = current_year_pitcher_outs
    else:
        if starter in prior_year_ind_pitcher_dist.index:
            prior_year_pitcher_outs = prior_year_ind_pitcher_dist.loc[starter]['sum']
            mean_of_starter = ((prior_year_ind_pitcher_dist.loc[starter]['mean'] * prior_year_pitcher_outs) + (weighted_league_innings_dist_mean * 100)) / (prior_year_pitcher_outs + 100)
            total_outs = prior_year_pitcher_outs
        else:
            mean_of_starter = weighted_league_innings_dist_mean
            total_outs = 0
        
    mean_of_league = weighted_league_innings_dist_mean
    
    combined_mean = (((mean_vs_team * weighted_outs) + (mean_of_starter * total_outs) + (mean_of_league * 100)) / (weighted_outs + total_outs + 100))

    try:
        var_vs_team = current_year_starts_vs_team.loc[opponent_id]['std'] ** 2
        total_outs_vs_team = current_year_starts_vs_team.loc[opponent_id]['sum']
        weighted_outs = total_outs_vs_team / 10
    except:
        var_vs_team = weighted_league_innings_dist_std ** 2
        weighed_outs = 0
      
    if starter in current_year_ind_pitcher_dist.index:
        current_year_pitcher_outs = current_year_ind_pitcher_dist.loc[starter]['sum']
        if starter in prior_year_ind_pitcher_dist.index:
            prior_year_pitcher_outs = prior_year_ind_pitcher_dist.loc[starter]['sum']
            total_outs = current_year_pitcher_outs + prior_year_pitcher_outs
            var_of_starter = (((current_year_ind_pitcher_dist.loc[starter]['std'] ** 2) * current_year_pitcher_outs) + ((prior_year_ind_pitcher_dist.loc[starter]['std'] ** 2) * prior_year_pitcher_outs)) / total_outs
        else:
            var_of_starter = (((current_year_ind_pitcher_dist.loc[starter]['std'] ** 2) * current_year_pitcher_outs) + ((weighted_league_innings_dist_std ** 2) * 100)) / (current_year_pitcher_outs + 100)
            total_outs = current_year_pitcher_outs 
    else:
        if starter in prior_year_ind_pitcher_dist.index:
            prior_year_pitcher_outs = prior_year_ind_pitcher_dist.loc[starter]['sum']
            var_of_starter = (((prior_year_ind_pitcher_dist.loc[starter]['std'] ** 2) * prior_year_pitcher_outs) + ((weighted_league_innings_dist_std ** 2) * 100)) / (current_year_pitcher_outs + 100)
            total_outs = prior_year_pitcher_outs
        else:
            var_of_starter = weighted_league_innings_dist_std ** 2
            total_outs = 0

    var_of_league = weighted_league_innings_dist_std ** 2
    
    
    total_var_outs = weighted_outs + total_outs + 100
    combined_var = ((((weighted_outs / total_var_outs) ** 2) * var_vs_team) + (((total_outs / total_var_outs) ** 2) * var_of_starter) + (((100 / total_var_outs) ** 2) * var_of_league))
    combined_std = np.sqrt(combined_var)
    
    s = np.random.normal(combined_mean, combined_std, 1000)
    
    k_per_out = df_player_sal.loc[df_player_sal.PlayerID == starter, 'pSO/Out'].reset_index(drop=True)[0]
    bb_per_out = df_player_sal.loc[df_player_sal.PlayerID == starter, 'pBB/Out'].reset_index(drop=True)[0]
    hr_per_out = df_player_sal.loc[df_player_sal.PlayerID == starter, 'pHR/Out'].reset_index(drop=True)[0]
    h_hr_per_out = df_player_sal.loc[df_player_sal.PlayerID == starter, 'pH-HR/Out'].reset_index(drop=True)[0]
    hbp_per_out = df_player_sal.loc[df_player_sal.PlayerID == starter, 'pHBP/Out'].reset_index(drop=True)[0]

    ks = []
    bbs = []
    hrs = []
    h_min_hr = []
    hbps = []
    ers = []
    qs = []
    over_5 = []
    ips = []

    for i in range(len(s)):
        sim_ks = s[i] * k_per_out
        sim_bbs = s[i] * bb_per_out
        sim_hrs = s[i] * hr_per_out
        sim_hits = s[i] * h_hr_per_out
        sim_hbp = s[i] * hbp_per_out
        sim_ip = s[i] / 3
        
        ips.append(sim_ip)

        sim_fip = (((13 * sim_hrs) + (3 * (sim_bbs + sim_hbp)) - (2 * sim_ks)) / sim_ip) + FIP_constant
        fip_total_er = (sim_fip / 9) * sim_ip
        sim_er_total = np.random.normal(fip_total_er, 1) 
        
        if (sim_ip >= 6) & (sim_er_total <= 3):
            qs.append(1)
        else:
            qs.append(0)

        if sim_ip >= 5:
            over_5.append(1)
        else:
            over_5.append(0)

        ks.append(sim_ks)
        bbs.append(sim_bbs)
        hrs.append(sim_hrs)
        h_min_hr.append(sim_hits)
        hbps.append(sim_hbp)
        ers.append(sim_er_total)
       

    starter_dict = {}
    
    starter_dict['pIP'] = round(mean(ips), 2)
    starter_dict['pK'] = round(mean(ks), 2)
    starter_dict['pBB'] = round(mean(bbs), 2)
    starter_dict['pHR'] = round(mean(hrs), 2)
    starter_dict['pH'] = round(mean(hrs) + mean(h_min_hr), 2)
    starter_dict['pHBP'] = round(mean(hbps), 2)
    starter_dict['pQS'] = round(mean(qs), 2)
    starter_dict['pER'] = round(mean(ers), 2)
    starter_dict['pW'] = round(mean(over_5) * starter_team_w_pct, 2)
    
    all_starters[starter] = starter_dict
    

In [35]:
result_df['pIP'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pIP'] if row['PlayerID'] in all_starters else 1, axis=1)
result_df['pW'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pW'] if row['PlayerID'] in all_starters else 0, axis=1)
result_df['pQS'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pQS'] if row['PlayerID'] in all_starters else 0, axis=1)
result_df['pK'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pK'] if row['PlayerID'] in all_starters else row['pSO/Out'] * 3, axis=1)
result_df['pBB'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pBB'] if row['PlayerID'] in all_starters else row['pBB/Out'] * 3, axis=1)
result_df['pHR'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pHR'] if row['PlayerID'] in all_starters else row['pHR/Out'] * 3, axis=1)
result_df['pH'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pH'] if row['PlayerID'] in all_starters else (row['pH-HR/Out'] + row['pHR/Out']) * 3, axis=1)
result_df['pHBP'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pHBP'] if row['PlayerID'] in all_starters else row['pHBP/Out'] * 3, axis=1)
result_df['pER'] = result_df.apply(lambda row: all_starters[row['PlayerID']]['pER'] if row['PlayerID'] in all_starters else round(((((13 * row['pHR']) + (3 * (row['pBB'] + row['pHBP'])) - (2 * row['pK'])) / 1) + FIP_constant) / 9, 2), axis=1)

result_df['DraftKingsPoints'] = result_df.apply(lambda row: round(row['pIP'] * 2.25 + row['pK'] * 2 + row['pW'] * 4 + row['pER'] * -2 + row['pH'] * -0.6 + row['pBB'] * -0.6 + row['pHBP'] * -0.6, 2), axis=1)
result_df['FanDuelPoints'] = result_df.apply(lambda row: round(row['pW'] * 6 + row['pQS'] * 4 + row['pER'] * -3 + row['pK'] * 3 + row['pIP'] * 3, 2), axis=1)



In [38]:
result_df[result_df['OperatorPlayerName'] == 'Chris Sale']

Unnamed: 0,SlatePlayerID,SlateID,SlateGameID,PlayerID,PlayerGameProjectionStatID,OperatorPlayerID,OperatorSlatePlayerID,OperatorPlayerName,OperatorPosition,OperatorSalary,RemovedByOperator,Team,TeamID,OperatorRosterSlots,Operator,OperatorSlateID,OperatorName,NumberOfGames,OperatorGameType,SalaryCap,EffectivePosition,pSO/Out,pBB/Out,pHR/Out,pH-HR/Out,pHBP/Out,GameID,OperatorGameID,Game.Season,Game.Day,Game.AwayTeam,Game.HomeTeam,Game.AwayTeamID,Game.HomeTeamID,Game.StadiumID,Game.AwayTeamProbablePitcherID,Game.HomeTeamProbablePitcherID,Game.PointSpread,Game.OverUnder,Game.AwayTeamMoneyLine,Game.HomeTeamMoneyLine,HomeOrAway,PlayerTeamMoneyLine,PlayerTeamPointSpread,PlayerTeamVegasWinProb,PlayerTeamTotal,pIP,pW,pQS,pK,pBB,pHR,pH,pHBP,pER,DraftKingsPoints,FanDuelPoints
144,4251572,9439,57824.0,10000249,2712956.0,392121,12844325,Chris Sale,SP,12000,False,BOS,25.0,[P],DraftKings,27782,Main,14,Classic,50000.0,SP,0.468,0.078,0.034,0.223,0.015,55288.0,5582949.0,2019.0,2019-06-21T00:00:00,TOR,BOS,3.0,25.0,50.0,10007605.0,10000249.0,-1.5,8.5,263.0,-291.0,HOME,-291.0,-1.5,0.744246,4.44,5.76,0.54,0.36,8.08,1.35,0.59,4.44,0.26,1.56,24.53,41.52
924,4254082,9507,58225.0,10000249,2712956.0,392121,12846295,Chris Sale,SP,12000,False,BOS,25.0,[P],DraftKings,27875,Turbo,6,Classic,50000.0,SP,0.468,0.078,0.034,0.223,0.015,55288.0,5582949.0,2019.0,2019-06-21T00:00:00,TOR,BOS,3.0,25.0,50.0,10007605.0,10000249.0,-1.5,8.5,263.0,-291.0,HOME,-291.0,-1.5,0.744246,4.44,5.76,0.54,0.36,8.08,1.35,0.59,4.44,0.26,1.56,24.53,41.52
1879,4256993,9515,58267.0,10000249,2712956.0,12477,36381-12477,Chris Sale,P,12000,False,BOS,25.0,[P],FanDuel,36381,All Day,15,Classic,35000.0,P,0.468,0.078,0.034,0.223,0.015,55288.0,142103.0,2019.0,2019-06-21T00:00:00,TOR,BOS,3.0,25.0,50.0,10007605.0,10000249.0,-1.5,8.5,263.0,-291.0,HOME,-291.0,-1.5,0.744246,4.44,5.76,0.54,0.36,8.08,1.35,0.59,4.44,0.26,1.56,24.53,41.52
2574,4255804,9513,58251.0,10000249,2712956.0,12477,36380-12477,Chris Sale,P,12000,False,BOS,25.0,[P],FanDuel,36380,Main,14,Classic,35000.0,P,0.468,0.078,0.034,0.223,0.015,55288.0,142103.0,2019.0,2019-06-21T00:00:00,TOR,BOS,3.0,25.0,50.0,10007605.0,10000249.0,-1.5,8.5,263.0,-291.0,HOME,-291.0,-1.5,0.744246,4.44,5.76,0.54,0.36,8.08,1.35,0.59,4.44,0.26,1.56,24.53,41.52


In [36]:
projection_df = result_df[['PlayerID','SlateID', 'Operator', 'OperatorPlayerID', 'TeamID', 'Team', 'OperatorSalary','OperatorGameType', 'SalaryCap', 'OperatorPlayerName', 'OperatorPosition', 'OperatorRosterSlots', 'Team', 'pIP', 'pW', 'pQS', 'pK', 'pBB', 'pHR', 'pH', 'pHBP', 'pER', 'DraftKingsPoints', 'FanDuelPoints']].reset_index(drop=True)
projection_df

Unnamed: 0,PlayerID,SlateID,Operator,OperatorPlayerID,TeamID,Team,OperatorSalary,OperatorGameType,SalaryCap,OperatorPlayerName,OperatorPosition,OperatorRosterSlots,Team.1,pIP,pW,pQS,pK,pBB,pHR,pH,pHBP,pER,DraftKingsPoints,FanDuelPoints
0,10000787,9508,DraftKings,548447,18.0,NYM,11000,Showdown,50000.0,Jacob deGrom,SP,[UTIL],NYM,1.0,0.0,0.0,1.209,0.243,0.096,0.831,0.045,0.31,3.38,5.70
1,10000787,9508,DraftKings,548447,18.0,NYM,16500,Showdown,50000.0,Jacob deGrom,SP,[CPT],NYM,1.0,0.0,0.0,1.209,0.243,0.096,0.831,0.045,0.31,3.38,5.70
2,10000931,9508,DraftKings,202810,9.0,CHC,11000,Showdown,50000.0,Cole Hamels,SP,[UTIL],CHC,1.0,0.0,0.0,0.972,0.354,0.123,0.909,0.045,0.44,2.53,4.60
3,10000931,9508,DraftKings,202810,9.0,CHC,16500,Showdown,50000.0,Cole Hamels,SP,[CPT],CHC,1.0,0.0,0.0,0.972,0.354,0.123,0.909,0.045,0.44,2.53,4.60
4,10003212,9508,DraftKings,577809,9.0,CHC,11000,Showdown,50000.0,Kyle Hendricks,SP,[UTIL],CHC,1.0,0.0,0.0,0.855,0.219,0.108,0.897,0.045,0.40,2.46,4.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3530,10006217,9528,FanDuel,80849,11.0,TB,5500,Classic,35000.0,Adam Kolarek,P,[P],TB,1.0,0.0,0.0,0.792,0.333,0.096,0.984,0.045,0.43,2.16,4.09
3531,10000924,9528,FanDuel,52177,13.0,SEA,5500,Classic,35000.0,Jesse Biddle,P,[P],SEA,1.0,0.0,0.0,0.903,0.507,0.108,0.885,0.045,0.48,2.23,4.27
3532,10008620,9528,FanDuel,84452,1.0,LAD,5500,Classic,35000.0,Caleb Ferguson,P,[P],LAD,1.0,0.0,0.0,1.128,0.402,0.138,0.894,0.045,0.44,2.82,5.06
3533,10002006,9528,FanDuel,52733,1.0,LAD,5500,Classic,35000.0,Scott Alexander,P,[P],LAD,1.0,0.0,0.0,0.735,0.387,0.093,0.921,0.045,0.46,1.99,3.83


In [37]:
projection_df.sort_values(by=['DraftKingsPoints'], ascending=False)

Unnamed: 0,PlayerID,SlateID,Operator,OperatorPlayerID,TeamID,Team,OperatorSalary,OperatorGameType,SalaryCap,OperatorPlayerName,OperatorPosition,OperatorRosterSlots,Team.1,pIP,pW,pQS,pK,pBB,pHR,pH,pHBP,pER,DraftKingsPoints,FanDuelPoints
2574,10000249,9513,FanDuel,12477,25.0,BOS,12000,Classic,35000.0,Chris Sale,P,[P],BOS,5.76,0.54,0.36,8.080,1.350,0.590,4.440,0.260,1.56,24.53,41.52
1879,10000249,9515,FanDuel,12477,25.0,BOS,12000,Classic,35000.0,Chris Sale,P,[P],BOS,5.76,0.54,0.36,8.080,1.350,0.590,4.440,0.260,1.56,24.53,41.52
924,10000249,9507,DraftKings,392121,25.0,BOS,12000,Classic,50000.0,Chris Sale,SP,[P],BOS,5.76,0.54,0.36,8.080,1.350,0.590,4.440,0.260,1.56,24.53,41.52
144,10000249,9439,DraftKings,392121,25.0,BOS,12000,Classic,50000.0,Chris Sale,SP,[P],BOS,5.76,0.54,0.36,8.080,1.350,0.590,4.440,0.260,1.56,24.53,41.52
1898,10001350,9515,FanDuel,10978,35.0,WSH,9800,Classic,35000.0,Stephen Strasburg,P,[P],WSH,5.94,0.49,0.36,7.030,1.620,0.680,5.060,0.270,2.05,21.12,37.14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,10005322,9502,DraftKings,503235,14.0,ARI,6000,Showdown,50000.0,Matt Koch,SP,[UTIL],ARI,1.00,0.00,0.00,0.603,0.279,0.183,0.984,0.045,0.58,1.51,3.07
3188,10005414,9513,FanDuel,81664,17.0,DET,5500,Classic,35000.0,Zac Reininger,P,[P],DET,1.00,0.00,0.00,0.606,0.402,0.180,1.035,0.045,0.62,1.33,2.96
614,10005414,9439,DraftKings,737896,17.0,DET,4000,Classic,50000.0,Zac Reininger,RP,[P],DET,1.00,0.00,0.00,0.606,0.402,0.180,1.035,0.045,0.62,1.33,2.96
1130,10005414,9507,DraftKings,737896,17.0,DET,4000,Classic,50000.0,Zac Reininger,RP,[P],DET,1.00,0.00,0.00,0.606,0.402,0.180,1.035,0.045,0.62,1.33,2.96
