In [339]:
import pandas as pd
import os
import numpy as np
import math
from scipy.stats import pearsonr
import requests
from datetime import date
from datetime import datetime
from statistics import mean
from pulp import *
from collections import Counter
import warnings
import requests, zipfile
from io import BytesIO
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

from projection_file_functions import *

In [340]:
today = date.today()
d = today.strftime("%Y-%b-%d")


In [126]:
#Defining the zip file URL
## This should only happen once a day
url = ' https://sportsdata.io/members/download-file?product=de918b91-6b99-4425-aa3b-de8835d70665'

filename = 'Fantasy.2019-2022.zip'

# Downloading the file by sending the request to the URL
req = requests.get(url)

# extracting the zip file contents
zipfile = zipfile.ZipFile(BytesIO(req.content))
cwd = os.getcwd()
zipfile.extractall(cwd + '/daily-downloads/Fantasy.2019-2022' + d)

TypeError: can only concatenate str (not "type") to str

In [516]:
response = requests.get(f'https://api.sportsdata.io/v3/mlb/projections/json/StartingLineupsByDate/{date}', headers={'Ocp-Apim-Subscription-Key': '6fcab751d8594ce9909283dcdc522d24'})
games = response.json()

In [517]:
games

{'statusCode': 401,
 'message': 'Access denied due to invalid subscription key. Make sure to provide a valid key for an active subscription.'}

In [341]:
def get_batter_salaries(date):
    response = requests.get(f'https://api.sportsdata.io/api/mlb/fantasy/json/DfsSlatesByDate/{date}', headers={'Ocp-Apim-Subscription-Key': '6fcab751d8594ce9909283dcdc522d24'})
    games = response.json()
    df_slates = pd.json_normalize(games)
    df_slates = df_slates[['SlateID', 'Operator', 'OperatorSlateID', 'OperatorName', 'NumberOfGames', 'OperatorGameType', 'SalaryCap']]
    df_player_sal = pd.json_normalize(games, record_path =['DfsSlatePlayers'])
    df_player_sal = df_player_sal[(df_player_sal['OperatorPosition'] != 'SP') & (df_player_sal['OperatorPosition'] != 'RP') & (df_player_sal['OperatorPosition'] != 'P')].reset_index(drop=True)
    df_player_sal = df_player_sal.merge(df_slates, how='left', on='SlateID')

    players = list(df_player_sal.PlayerID.unique())

    return df_player_sal, players



In [342]:
sals, players = get_batter_salaries(d)

In [343]:
def get_current_season_game_logs_batters(date):
    
    season = date[:4]
    cwd = os.getcwd()
    os.chdir(cwd + '/daily-downloads/Fantasy.2019-2022' + date)

    ## Currently using all past games we have access to for park_adjusted_sals_with_vegas_linesing
    ## In practice you'd just need the current season data file
    game_stats = pd.read_csv(f'PlayerGame.{season}.csv')

    # Select only regular season data (should only be needed on backpark_adjusted_sals_with_vegas_linesing)
    game_stats = game_stats.loc[game_stats.SeasonType == 1].reset_index(drop=True)

    # Select only batter data
    game_stats = game_stats.loc[game_stats.PositionCategory != 'P'].reset_index(drop=True)

    # Calculate plate appearances by game
    game_stats['PlateAppearances'] = game_stats.apply(lambda row: row['AtBats'] + row['Walks'] + row['HitByPitch'] + row['Sacrifices'], axis=1)

    # Just changes name of dataframe
    #data = game_stats.sort_values(['PlayerID', 'Day'], ascending=True).reset_index(drop=True)

    # For park_adjusted_sals_with_vegas_linesing only, need to select only games from prior to the request date
    #data['Day'] = data['Day'].astype('datetime64[ns]')
    #data['Day'] = data['Day'].dt.date
    #date_object = datetime.strptime(date, '%Y-%b-%d').date()

    #data = data[data['Day'] < date_object].reset_index(drop=True)

    game_stats.rename(columns = {'Runs': 'R', 'Singles': 'S', 'Doubles': 'D', 'Triples': 'T', 'HomeRuns': 'HR', 'AtBats': 'AB', 'Walks':'BB', 'RunsBattedIn': 'RBI', 'PlateAppearances': 'PA', 'Hits': 'H', 'HitByPitch': 'HP', 'StolenBases': 'SB', 'CaughtStealing': 'CS', 'Strikeouts': 'SO'},  
            inplace = True)

    return game_stats


In [344]:
game_logs = get_current_season_game_logs_batters(d)

In [346]:
# Retrieves already calculated "marcels" projections
# which are full season projections calculated prior to the 
# start of the specified season
# These act as our baseline assumption of talent level for each player 
def get_marcels_batters(date):

    season = date[:4]

    os.chdir('../..')
    cwd = os.getcwd()
    os.chdir(cwd + '/BaselineProjections')
    marcels = pd.read_csv(f'marcel_batters_{season}.csv')
    marcels.rename(columns = {'rel': 'Reliability'}, inplace = True)

    return marcels

In [347]:
marcels = get_marcels_batters(d)

In [348]:
df = marcels[['Name', 'Reliability']]
game_logs = game_logs.merge(df, how='left', on='Name')
game_logs = game_logs.loc[game_logs['PlayerID'].isin(players)]
sum_data = game_logs[['PlayerID', 'PA', 'S', 'D', 'T', 'HR', 'BB', 'HP', 'SB', 'CS', 'SO']].reset_index(drop=True).groupby(['PlayerID']).sum()

reliability_dict = {}



In [349]:
for index, row in game_logs.iterrows():
    if math.isnan(row['Reliability']) == True:
        reliability_dict[row['PlayerID']] = 0
    else:
        reliability_dict[row['PlayerID']] = row['Reliability']

In [350]:
def create_stabilization_dict_hitters(dataframe, rel_dict):

    player_dict = {}
    if dataframe.shape[0] == 0: pass # need to include something for the first day of the season
    else:
        player_id_list = list(dataframe.index.values)
    for player in player_id_list:
        player_dict[player] = {}
        for stat in ['PA', 'S', 'D', 'T', 'HR', 'BB', 'HP', 'SB', 'CS', 'SO']:
            value = dataframe.loc[player, stat]
            player_dict[player][stat] = value

        PA = player_dict[player]['PA']
        BB = player_dict[player]['BB']
        HBP = player_dict[player]['HP']
        HR = player_dict[player]['HR']
        S = player_dict[player]['S']
        D = player_dict[player]['D']
        T = player_dict[player]['T']
        CS = player_dict[player]['CS']
        SO = player_dict[player]['SO']

        rel = rel_dict[player]
        rel_fact = (2.2 ** rel) / 2

        # These values represent how much influence a players performance in the current season
        # will have on adjusting our prior estimates of a players per PA talent level
        player_dict[player]['BB_s'] = PA / (PA + (120 * rel_fact))
        player_dict[player]['HBP_s'] = PA / (PA + (240 * rel_fact))
        player_dict[player]['S_s'] = PA / (PA + (290 * rel_fact))
        player_dict[player]['D_s'] = (D + T) / ((D + T) + (48 * rel_fact))
        player_dict[player]['T_s'] = (D + T) / ((D + T) + (48 * rel_fact))
        player_dict[player]['HR_s'] = PA / (PA + (170 * rel_fact))
        player_dict[player]['SBA_s'] = (S + BB + HBP) / ((S + BB + HBP) + (39 * rel_fact))
        player_dict[player]['SO_s'] = PA / (PA + (60 * rel_fact))

    return player_dict

In [351]:
player_stabilization_dict = create_stabilization_dict_hitters(sum_data, reliability_dict)

In [356]:
def create_per_pa_marcels_rates_hitters(game_logs_file, marcels_file):
    data_ID = game_logs_file[['PlayerID', 'Name']].drop_duplicates()
    marcels = marcels_file.drop_duplicates(subset=['Name'], keep='first')
    marcels = marcels.merge(data_ID, how='left', on='Name')
    marcels = marcels[marcels['PlayerID'].notna()]
    marcels = marcels.set_index('PlayerID')
    rel_columns = marcels.columns.to_list()[12:30]
    marcels = marcels[rel_columns]
    marcels = marcels.div(marcels.PA, axis=0)
    marcels['S'] = marcels.apply(lambda row: row['H'] - row['D'] - row['T'] - row['HR'], axis=1)
    marcel_players = marcels.index.to_list()
    marcels_dict = marcels.to_dict('index')

    return marcels_dict, marcel_players

In [357]:
marcels_dict, marcel_players = create_per_pa_marcels_rates_hitters(game_logs, marcels)

In [359]:
new = sals['OperatorPosition'].str.split('/', n = 1, expand = True)
sals['EffectivePosition'] = new[0]

In [360]:
game_logs.head()

Unnamed: 0,StatID,TeamID,PlayerID,SeasonType,Season,Name,Team,Position,PositionCategory,Started,InjuryStatus,GameID,OpponentID,Opponent,Day,DateTime,HomeOrAway,Games,FantasyPoints,AB,R,H,S,D,T,HR,RBI,BattingAverage,Outs,SO,BB,HP,Sacrifices,SacrificeFlies,GroundIntoDoublePlay,SB,CS,OnBasePercentage,SluggingPercentage,OnBasePlusSlugging,Wins,Losses,Saves,InningsPitchedDecimal,TotalOutsPitched,InningsPitchedFull,InningsPitchedOuts,EarnedRunAverage,PitchingHits,PitchingRuns,PitchingEarnedRuns,PitchingWalks,PitchingStrikeouts,PitchingHomeRuns,PitchesThrown,PitchesThrownStrikes,WalksHitsPerInningsPitched,PitchingBattingAverageAgainst,FantasyPointsFanDuel,FantasyPointsDraftKings,WeightedOnBasePercentage,PitchingCompleteGames,PitchingShutOuts,PitchingOnBasePercentage,PitchingSluggingPercentage,PitchingOnBasePlusSlugging,PitchingStrikeoutsPerNineInnings,PitchingWalksPerNineInnings,PitchingWeightedOnBasePercentage,PA,Reliability
0,3602597,26,10000780,1,2022,Eddie Rosario,ATL,RF,OF,1,,64342,2,CIN,4/7/2022 12:00:00 AM,4/7/2022 8:00:00 PM,HOME,1,2.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.2,4.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.84
1,3602598,26,10001961,1,2022,Matt Olson,ATL,1B,IF,1,,64342,2,CIN,4/7/2022 12:00:00 AM,4/7/2022 8:00:00 PM,HOME,1,3.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.2,6.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.86
2,3602599,26,10006622,1,2022,Austin Riley,ATL,3B,IF,1,,64342,2,CIN,4/7/2022 12:00:00 AM,4/7/2022 8:00:00 PM,HOME,1,9.0,4.0,1.0,3.0,2.0,0.0,0.0,1.0,2.0,0.75,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.75,1.5,2.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.2,22.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.84
3,3602600,26,10000689,1,2022,Marcell Ozuna,ATL,LF,OF,1,,64342,2,CIN,4/7/2022 12:00:00 AM,4/7/2022 8:00:00 PM,HOME,1,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,2.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.82
4,3602601,26,10005445,1,2022,Ozzie Albies,ATL,2B,IF,1,,64342,2,CIN,4/7/2022 12:00:00 AM,4/7/2022 8:00:00 PM,HOME,1,1.0,4.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.25,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.85


In [361]:
def get_average_stats_by_position(date, game_logs, pos_group):

    if pos_group == 'hitters':

        ## Average all stats by position

        average_stats_by_position = game_logs[['Position', 'PA', 'S', 'D', 'T', 'HR', 'BB', 'HP', 'SB', 'CS', 'SO']].reset_index(drop=True).groupby(['Position']).sum()

        ## DH and PH stats get lumped into 1B position
        average_stats_by_position.loc['1B'] = average_stats_by_position.loc[['1B', 'DH', 'PH']].sum()

        ## DFS sites only use the OF position, so LF, RF, and CF get lumped in together
        average_stats_by_position.loc['OF'] = average_stats_by_position.loc[['LF', 'RF', 'CF']].sum()
        average_stats_by_position = average_stats_by_position.drop(['DH', 'PH', 'PR', 'LF', 'RF', 'CF'])

        prior_season_stats = get_prior_season_stats(date)
        prior_season_stats = prior_season_stats.loc[prior_season_stats.SeasonType == 1].reset_index(drop=True)
        prior_season_stats = prior_season_stats.loc[prior_season_stats.PositionCategory != 'P'].reset_index(drop=True)
        prior_season_stats['PlateAppearances'] = prior_season_stats.apply(lambda row: row['AtBats'] + row['Walks'] + row['HitByPitch'] + row['Sacrifices'], axis=1)
        prior_season_stats.rename(columns = {'Runs': 'R', 'Singles': 'S', 'Doubles': 'D', 'Triples': 'T', 'HomeRuns': 'HR', 'AtBats': 'AB', 'Walks':'BB', 'RunsBattedIn': 'RBI', 'PlateAppearances': 'PA', 'Hits': 'H', 'HitByPitch': 'HP', 'StolenBases': 'SB', 'CaughtStealing': 'CS', 'Strikeouts': 'SO'},  
                inplace = True)
        
        prior_season_league_stats = prior_season_stats[['Position', 'PA', 'S', 'D', 'T', 'HR', 'BB', 'HP', 'SB', 'CS', 'SO']].reset_index(drop=True).groupby(['Position']).sum()
        prior_season_league_stats.loc['1B'] = prior_season_league_stats.loc[['1B', 'DH']].sum()
        prior_season_league_stats.loc['OF'] = prior_season_league_stats.loc[['LF', 'RF', 'CF']].sum()
        prior_season_league_stats = prior_season_league_stats.drop(['DH', 'LF', 'RF', 'CF'])
        prior_season_league_stats = prior_season_league_stats.div(prior_season_league_stats.PA, axis=0)

        positions = ['1B', '2B', '3B', 'SS', 'C', 'OF']

        for pos in positions:
            total = prior_season_league_stats.loc[pos] * 2000 + average_stats_by_position.loc[pos]
            new_row = total.divide(total.PA)
            average_stats_by_position.loc[pos] = new_row

        return average_stats_by_position

    elif pos_group == 'pitchers':

        average_stats_by_position = game_logs[['Started', 'W', 'TotalOutsPitched', 'ER', 'BB', 'SO', 'HR', 'H', 'H-HR']].reset_index(drop=True).groupby(['Started']).sum()

        prior_season_stats = get_prior_season_stats(date)
        prior_season_stats = prior_season_stats.loc[prior_season_stats.SeasonType == 1].reset_index(drop=True)
        prior_season_stats = prior_season_stats.loc[prior_season_stats.PositionCategory == 'P'].reset_index(drop=True)
        prior_season_stats['isPrimaryStarter'] = prior_season_stats.apply(lambda row: 1 if row['Started'] > (row['Games'] - row['Started']) else 0, axis=1)
        prior_season_stats.rename(columns = {'Wins': 'W', 'PitchingEarnedRuns': 'ER', 'PitchingWalks': 'BB', 'PitchingStrikeouts': 'SO', 'PitchingHomeRuns': 'HR', 'PitchingHits': 'H'}, inplace = True)
        prior_season_stats['H-HR'] = prior_season_stats.apply(lambda row: row['H'] - row['HR'], axis=1)

        prior_season_league_stats = prior_season_stats[['isPrimaryStarter', 'W', 'TotalOutsPitched', 'ER', 'BB', 'SO', 'HR', 'H', 'H-HR']].reset_index(drop=True).groupby(['isPrimaryStarter']).sum()
        prior_season_league_stats = prior_season_league_stats.div(prior_season_league_stats.TotalOutsPitched, axis=0)

        positions = [0, 1]

        for pos in positions:
            total = prior_season_league_stats.loc[pos] * 15000 + average_stats_by_position.loc[pos]
            new_row = total.divide(total.TotalOutsPitched)
            average_stats_by_position.loc[pos] = new_row

        return average_stats_by_position

    else:

        return


In [362]:
average_stats_by_position = get_average_stats_by_position(d, game_logs, 'hitters')

In [363]:
average_stats_by_position

Unnamed: 0_level_0,PA,S,D,T,HR,BB,HP,SB,CS,SO
Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1B,1.0,0.13551,0.043465,0.002189,0.034364,0.097084,0.01195,0.005422,0.003467,0.232056
2B,1.0,0.14076,0.041392,0.005767,0.020453,0.079526,0.012879,0.014057,0.005093,0.206173
3B,1.0,0.14382,0.047249,0.002204,0.030387,0.093339,0.008362,0.009962,0.003824,0.218196
C,1.0,0.128954,0.037796,0.001399,0.027606,0.086992,0.012854,0.003185,0.000831,0.246374
SS,1.0,0.150124,0.045063,0.005186,0.023348,0.074744,0.010882,0.020611,0.005419,0.214761
OF,1.0,0.129947,0.043384,0.004246,0.027793,0.090622,0.01148,0.015145,0.005642,0.237926


In [376]:
player_stabilization_dict

{10000029: {'PA': 12.0,
  'S': 1.0,
  'D': 1.0,
  'T': 0.0,
  'HR': 1.0,
  'BB': 0.0,
  'HP': 0.0,
  'SB': 0.0,
  'CS': 0.0,
  'SO': 3.0,
  'BB_s': 0.09349046508409889,
  'HBP_s': 0.04903750197515948,
  'S_s': 0.040928802711129905,
  'D_s': 0.021033973714052974,
  'T_s': 0.021033973714052974,
  'HR_s': 0.06785920737033176,
  'SBA_s': 0.025762914616792813,
  'SO_s': 0.17099456843806804},
 10000030: {'PA': 42.0,
  'S': 4.0,
  'D': 2.0,
  'T': 0.0,
  'HR': 3.0,
  'BB': 3.0,
  'HP': 0.0,
  'SB': 0.0,
  'CS': 1.0,
  'SO': 10.0,
  'BB_s': 0.29542657328739846,
  'HBP_s': 0.17331407885264932,
  'S_s': 0.1478503670101921,
  'D_s': 0.04754329265841355,
  'T_s': 0.04754329265841355,
  'HR_s': 0.22838041767320472,
  'SBA_s': 0.1769715766235988,
  'SO_s': 0.45610701429058337},
 10000031: {'PA': 75.0,
  'S': 14.0,
  'D': 4.0,
  'T': 0.0,
  'HR': 0.0,
  'BB': 8.0,
  'HP': 0.0,
  'SB': 2.0,
  'CS': 0.0,
  'SO': 14.0,
  'BB_s': 0.386318394014877,
  'HBP_s': 0.23940186997362267,
  'S_s': 0.2066556418936

In [385]:
def create_blended_projections_hitters(players, marcel_players, player_dict, marcels_dict, df_player_sal, average_stats_by_position):
    ## Different methods based on availability of pre season projections

    player_projs_dict = {}

    for player in players:
        
        if player in marcel_players: # if they have a marcels projection
        
            new_player_dict = {}
            stat_list = ['S', 'D', 'T', 'HR', 'BB', 'HP', 'SO', 'SB']
            stab_list = ['S_s', 'D_s', 'T_s', 'HR_s', 'BB_s', 'HBP_s', 'SO_s', 'SBA_s']
            i = 0

            if player in player_dict:
                
                player_proj = player_dict[player]
                
            else: # If they do not have games (3)
                
                for stat in stat_list:
                    new_player_dict[stat] = marcels_dict[player][stat]
                    
                player_projs_dict[player] = new_player_dict
                
            # both games and marcel (1)

            PA = player_proj['PA']
            
            if PA == 0:
                
                for stat in stat_list:
                    new_player_dict[stat] = marcels_dict[player][stat]
                    
                player_projs_dict[player] = new_player_dict                

            for stat in stat_list:

                if stat == 'SB':

                    opps = player_proj['S'] + player_proj['HP'] + player_proj['BB']
                    sba_exp = (marcels_dict[player][stat] + marcels_dict[player]['CS']) / (marcels_dict[player]['S'] + marcels_dict[player]['BB'] + marcels_dict[player]['HP'])
                    if opps == 0:
                        sba_act = 0
                    else:
                        sba_act = (player_proj[stat] + player_proj['CS']) / opps
                    sba_blend = (sba_act * player_proj['SBA_s']) + (sba_exp * (1 - player_proj['SBA_s']))
                    succ_rate_proj = marcels_dict[player][stat] / (marcels_dict[player][stat] + marcels_dict[player]['CS'])

                else:

                    stat_exp = marcels_dict[player][stat] * PA
                    stat_act = player_proj[stat]
                    stat_blend = (stat_act * player_proj[stab_list[i]]) + (stat_exp * (1 - player_proj[stab_list[i]]))

                    new_player_dict[stat_list[i]] = stat_blend / PA

                    i += 1

            new_player_dict['SB'] = sba_blend * succ_rate_proj * (new_player_dict['S'] + new_player_dict['BB'] - new_player_dict['HP'])

            player_projs_dict[player] = new_player_dict
        
        else: 
            
            if player in player_dict: # Check if they've played games
                # No marcels, but games (2)
                # position average acts as default marcels projections
                player_proj = player_dict[player]
                
                eff_pos = df_player_sal.loc[df_player_sal['PlayerID'] == player, 'EffectivePosition'].iloc[0]
                if eff_pos == 'DH':
                    eff_pos = '1B'
                proj_by_position = average_stats_by_position.loc[eff_pos]
                proj_by_pos_dict = proj_by_position.to_dict()
                
                new_player_dict = {}
                stat_list = ['S', 'D', 'T', 'HR', 'BB', 'HP', 'SO', 'SB']
                stab_list = ['S_s', 'D_s', 'T_s', 'HR_s', 'BB_s', 'HBP_s', 'SO_s', 'SBA_s']
                i = 0
                
                PA = player_proj['PA']
            
                if PA == 0:

                    for stat in stat_list:
                        new_player_dict[stat] = proj_by_pos_dict[stat]

                    player_projs_dict[player] = new_player_dict

                for stat in stat_list:

                    if stat == 'SB':

                        opps = player_proj['S'] + player_proj['HP'] + player_proj['BB']
                        sba_exp = (proj_by_pos_dict[stat] + proj_by_pos_dict['CS']) / (proj_by_pos_dict['S'] + proj_by_pos_dict['BB'] + proj_by_pos_dict['HP'])
                        if opps == 0:
                            sba_act = 0
                        else:
                            sba_act = (player_proj[stat] + player_proj['CS']) / opps
                        sba_blend = (sba_act * player_proj['SBA_s']) + (sba_exp * (1 - player_proj['SBA_s']))
                        succ_rate_proj = proj_by_pos_dict[stat] / (proj_by_pos_dict[stat] + proj_by_pos_dict['CS'])

                    else:

                        stat_exp = proj_by_pos_dict[stat] * PA
                        stat_act = player_proj[stat]
                        stat_blend = (stat_act * player_proj[stab_list[i]]) + (stat_exp * (1 - player_proj[stab_list[i]]))

                        new_player_dict[stat_list[i]] = stat_blend / PA

                        i += 1

                new_player_dict['SB'] = sba_blend * succ_rate_proj * (new_player_dict['S'] + new_player_dict['BB'] - new_player_dict['HP'])

                player_projs_dict[player] = new_player_dict
                
            else: # Neither games nor marcels (4)
            
                # Find effective position

                eff_pos = df_player_sal.loc[df_player_sal['PlayerID'] == player, 'EffectivePosition'].iloc[0]
                proj_by_position = average_stats_by_position.loc[eff_pos]
                proj_by_pos_dict = proj_by_position.to_dict()
                player_projs_dict[player] = proj_by_pos_dict

    return player_projs_dict

In [386]:
blended_projections_dict = create_blended_projections_hitters(players, marcel_players, player_stabilization_dict, marcels_dict, sals, average_stats_by_position)



In [389]:
sals['pS/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['S'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)
sals['pD/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['D'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)
sals['pT/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['T'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)
sals['pHR/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['HR'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)
sals['pBB/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['BB'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)
sals['pHP/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['HP'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)
sals['pSB/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['SB'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)
sals['pSO/PA'] = sals.apply(lambda row: round(blended_projections_dict[row['PlayerID']]['SO'], 3) if row['PlayerID'] in blended_projections_dict else np.NaN, axis=1)


In [390]:
def get_vegas_lines(date, player_salaries_df):

    response = requests.get(f'https://api.sportsdata.io/api/mlb/fantasy/json/DfsSlatesByDate/{date}', headers={'Ocp-Apim-Subscription-Key': '6fcab751d8594ce9909283dcdc522d24'})
    games = response.json()
    df_games = pd.json_normalize(games, record_path =['DfsSlateGames'])
    df_games = df_games[['SlateGameID', 'GameID', 'OperatorGameID', 'Game.Season', 'Game.Day', 'Game.AwayTeam', 'Game.HomeTeam', 'Game.AwayTeamID', 'Game.HomeTeamID', 'Game.StadiumID', 'Game.AwayTeamProbablePitcherID', 'Game.HomeTeamProbablePitcherID', 'Game.PointSpread', 'Game.OverUnder', 'Game.AwayTeamMoneyLine', 'Game.HomeTeamMoneyLine']]
    result_df = player_salaries_df.merge(df_games, how='left', on = ['SlateGameID'])

    starting_pitchers = list(set(list(df_games['Game.AwayTeamProbablePitcherID'])).union(set(list(df_games['Game.HomeTeamProbablePitcherID']))))
    starting_pitchers = [x for x in starting_pitchers if str(x) != 'nan']

    result_df['HomeOrAway'] = result_df.apply(lambda row: 'AWAY' if row['Game.AwayTeamID'] == row['TeamID'] else 'HOME', axis=1)
    result_df['PlayerTeamMoneyLine'] = result_df.apply(lambda row: row['Game.AwayTeamMoneyLine'] if row['HomeOrAway'] == 'AWAY' else row['Game.HomeTeamMoneyLine'], axis=1)
    result_df['PlayerTeamPointSpread'] = result_df.apply(lambda row: abs(row['Game.PointSpread']) * -1 if row['PlayerTeamMoneyLine'] < 0 else abs(row['Game.PointSpread']), axis=1)
    result_df['PlayerTeamVegasWinProb'] = result_df.apply(lambda row: 100 / (100 + row['PlayerTeamMoneyLine']) if row['PlayerTeamMoneyLine'] > 0 else row['PlayerTeamMoneyLine'] / (row['PlayerTeamMoneyLine'] - 100), axis=1)
    result_df['PlayerTeamTotal'] = result_df.apply(lambda row: round((row['Game.OverUnder'] / 2) - ((row['PlayerTeamPointSpread'] * (100 / (abs(row['PlayerTeamMoneyLine']) + 100))) / 2), 2), axis=1)

    return result_df, starting_pitchers

In [391]:
sals_with_vegas_lines, starting_pitchers = get_vegas_lines(d, sals)

In [392]:
def adjust_for_park_factors(sals_with_vegas_lines):
    os.chdir('..')
    park_factors = pd.read_csv('ParkFactors.csv')
    park_adjusted_sals_with_vegas_lines = sals_with_vegas_lines.merge(park_factors, how = 'left', on = ['Game.StadiumID'])
    park_adjusted_sals_with_vegas_lines['pS/PA'] = park_adjusted_sals_with_vegas_lines['pS/PA'] * park_adjusted_sals_with_vegas_lines['1B'] / 100
    park_adjusted_sals_with_vegas_lines['pD/PA'] = park_adjusted_sals_with_vegas_lines['pD/PA'] * park_adjusted_sals_with_vegas_lines['2B'] / 100
    park_adjusted_sals_with_vegas_lines['pT/PA'] = park_adjusted_sals_with_vegas_lines['pT/PA'] * park_adjusted_sals_with_vegas_lines['3B'] / 100
    park_adjusted_sals_with_vegas_lines['pHR/PA'] = park_adjusted_sals_with_vegas_lines['pHR/PA'] * park_adjusted_sals_with_vegas_lines['HR'] / 100
    park_adjusted_sals_with_vegas_lines['pBB/PA'] = park_adjusted_sals_with_vegas_lines['pBB/PA'] * park_adjusted_sals_with_vegas_lines['BB'] / 100
    park_adjusted_sals_with_vegas_lines['pSO/PA'] = park_adjusted_sals_with_vegas_lines['pSO/PA'] * park_adjusted_sals_with_vegas_lines['SO'] / 100

    return park_adjusted_sals_with_vegas_lines

In [393]:
pa_sals_with_vegas_lines = adjust_for_park_factors(sals_with_vegas_lines)

In [395]:
## NEED TO FIGURE THE BATTING ORDER PART OUT
#batting_order_file = get_batting_orders_file()

pa_sals_with_vegas_lines['battingorderposition'] = 2

In [396]:
def apply_starters_obp(salaries_df):

    starters_obp_data = salaries_df[salaries_df['battingorderposition'].notna()]
    starters_obp_data = starters_obp_data[['PlayerID', 'TeamID', 'pS/PA', 'pD/PA', 'pT/PA', 'pHR/PA', 'pBB/PA', 'pHP/PA']].drop_duplicates(['PlayerID'])
    starters_obp_data = starters_obp_data[['TeamID', 'pS/PA', 'pD/PA', 'pT/PA', 'pHR/PA', 'pBB/PA', 'pHP/PA']].reset_index(drop=True).groupby(['TeamID']).mean()
    starters_obp_data['pOBP'] = starters_obp_data.apply(lambda row: row['pS/PA'] + row['pD/PA'] + row['pT/PA'] + row['pHR/PA'] + row['pBB/PA'] + row['pHP/PA'], axis=1)
    salaries_df['startersOBP'] = salaries_df.apply(lambda row: starters_obp_data.loc[row['TeamID'], 'pOBP'] if row['TeamID'] in list(starters_obp_data.index) else 0, axis=1)

    return salaries_df

In [397]:
sals_with_batting_order = apply_starters_obp(pa_sals_with_vegas_lines)

In [398]:
sals_with_batting_order['pPA'] = sals_with_batting_order.apply(lambda row: 1 if pd.isnull(row['battingorderposition']) else round(3.3 + (-0.12 * row['battingorderposition']) + (.036 * row['PlayerTeamTotal']) + (3.92 * row['startersOBP']), 2), axis=1)
sals_with_batting_order['OBP-HR'] = sals_with_batting_order.apply(lambda row: row['pS/PA'] + row['pD/PA'] + row['pT/PA'] + row['pBB/PA'] + row['pHP/PA'], axis=1)
sals_with_batting_order['pAB/PA'] = sals_with_batting_order.apply(lambda row: 1 - row['pBB/PA'] - row['pHP/PA'], axis=1)



In [399]:
def find_lead_hitters_obp(salaries_df):

    temp_df = salaries_df[['PlayerID', 'TeamID', 'battingorderposition', 'pS/PA', 'pD/PA', 'pT/PA', 'pBB/PA', 'pHP/PA']].drop_duplicates(['PlayerID']).dropna(subset=['battingorderposition']).reset_index(drop=True)
    leadOBP_dict = {}

    for index, row in temp_df.iterrows():
        
        teamid = row['TeamID']
        playerid = row['PlayerID']
        b_order = row['battingorderposition']
        lead_hitters = []
        
        if b_order > 3:
            lead_hitters = [b_order - 1, b_order - 2, b_order - 3]
        elif b_order == 3: lead_hitters = [9, 2, 1]
        elif b_order == 2: lead_hitters = [9, 8, 1]
        else:
            lead_hitters = [7, 8, 9]
        
        
        sub_df = temp_df[temp_df['TeamID'] == teamid].reset_index(drop=True)
        boolean_series = sub_df.battingorderposition.isin(lead_hitters)
        filtered_df = sub_df[boolean_series].reset_index(drop=True)
        OB_events = (filtered_df['pS/PA'].sum() + filtered_df['pD/PA'].sum() + filtered_df['pT/PA'].sum() + filtered_df['pBB/PA'].sum() + filtered_df['pHP/PA'].sum()) / filtered_df.shape[0]
        
        #SLG_events = filtered_df.S.sum() + (2 * filtered_df.D.sum()) + (3 * filtered_df['T'].sum()) + (4 * filtered_df.HR.sum())


        leadOBP_dict[playerid] = OB_events

    return leadOBP_dict

def find_trail_hitters_ops(salaries_df):
    ## Could figure out how to handle pitchers in the 9 spot

    temp_df = salaries_df[['PlayerID', 'TeamID', 'battingorderposition', 'pS/PA', 'pD/PA', 'pT/PA', 'pHR/PA', 'pBB/PA', 'pHP/PA', 'pAB/PA']].drop_duplicates(['PlayerID']).dropna(subset=['battingorderposition']).reset_index(drop=True)
    trailOPS_dict = {}

    for index, row in temp_df.iterrows():
        
        teamid = row['TeamID']
        playerid = row['PlayerID']
        b_order = row['battingorderposition']
        lead_hitters = []
        
        if b_order < 7:
            lead_hitters = [b_order + 1, b_order + 2, b_order + 3]
        elif b_order == 7: lead_hitters = [9, 8, 1]
        elif b_order == 8: lead_hitters = [9, 2, 1]
        else:
            lead_hitters = [1, 2, 3]
        
        
        sub_df = temp_df[temp_df['TeamID'] == teamid].reset_index(drop=True)
        boolean_series = sub_df.battingorderposition.isin(lead_hitters)
        filtered_df = sub_df[boolean_series].reset_index(drop=True)
        OB_events = (filtered_df['pS/PA'].sum() + filtered_df['pD/PA'].sum() + filtered_df['pT/PA'].sum() + filtered_df['pHR/PA'].sum() + filtered_df['pBB/PA'].sum() + filtered_df['pHP/PA'].sum()) / filtered_df.shape[0]
        SLG_events = (filtered_df['pS/PA'].sum() + (2 * filtered_df['pD/PA'].sum()) + (3 * filtered_df['pT/PA'].sum()) + (4 * filtered_df['pHR/PA'].sum()) / filtered_df['pAB/PA'].mean()) / filtered_df.shape[0]
        
        OPS = OB_events + SLG_events


        trailOPS_dict[playerid] = OPS

    return trailOPS_dict

In [400]:
#lead_hitters_obp_dict = find_lead_hitters_obp(sals_with_batting_order)
#trail_hitters_ops_dict = find_trail_hitters_ops(sals_with_batting_order)

In [404]:
def generate_projection_df_hitters(sals_with_batting_order):

    sals_with_batting_order['pBA'] = sals_with_batting_order.apply(lambda row: (row['pS/PA'] + row['pD/PA'] + row['pT/PA'] + row['pHR/PA']) / row['pAB/PA'], axis=1)
    sals_with_batting_order['pRBI/PA'] = sals_with_batting_order.apply(lambda row: -0.065 + (1.3 * row['pHR/PA']) + (0.2 * row['pBA']), axis=1)
    sals_with_batting_order['pR-HR/PA'] = sals_with_batting_order.apply(lambda row: -0.055 + (.245 * row['OBP-HR']), axis=1)
    sals_with_batting_order['pR/PA'] = sals_with_batting_order.apply(lambda row: row['pR-HR/PA'] + row['pHR/PA'], axis=1)
    sals_with_batting_order['pS'] = sals_with_batting_order.apply(lambda row: round(row['pS/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pD'] = sals_with_batting_order.apply(lambda row: round(row['pD/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pT'] = sals_with_batting_order.apply(lambda row: round(row['pT/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pHR'] = sals_with_batting_order.apply(lambda row: round(row['pHR/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pBB'] = sals_with_batting_order.apply(lambda row: round(row['pBB/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pHP'] = sals_with_batting_order.apply(lambda row: round(row['pHP/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pR'] = sals_with_batting_order.apply(lambda row: round(row['pR/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pRBI'] = sals_with_batting_order.apply(lambda row: round(row['pRBI/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pSB'] = sals_with_batting_order.apply(lambda row: round(row['pSB/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['pSO'] = sals_with_batting_order.apply(lambda row: round(row['pSO/PA'] * row['pPA'], 2), axis=1)
    sals_with_batting_order['DraftKingsPoints'] = sals_with_batting_order.apply(lambda row: round((3 * row['pS']) + (5 * row['pD']) + (8 * row['pT']) + (10 * row['pHR']) + (2 * row['pRBI']) + (2 * row['pR']) + (2 * row['pBB']) + (2 * row['pHP']) + (5 * row['pSB']), 2), axis=1)
    sals_with_batting_order['FanDuelPoints'] = sals_with_batting_order.apply(lambda row: round((3 * row['pS']) + (6 * row['pD']) + (9 * row['pT']) + (12 * row['pHR']) + (3.5 * row['pRBI']) + (3.2 * row['pR']) + (3 * row['pBB']) + (3 * row['pHP']) + (6 * row['pSB']), 2), axis=1)
    sals_with_batting_order['H_A'] = sals_with_batting_order.apply(lambda row: 'H' if row['Game.HomeTeamID'] == row['TeamID'] else 'A', axis=1)
    sals_with_batting_order['Opponent_ID'] = sals_with_batting_order.apply(lambda row: row['Game.HomeTeamID'] if row['H_A'] == 'A' else row['Game.AwayTeamID'], axis=1)
    projection_df = sals_with_batting_order[['PlayerID','SlateID', 'Operator', 'OperatorPlayerID', 'TeamID', 'OperatorSalary','OperatorGameType', 'SalaryCap', 'OperatorPlayerName', 'OperatorPosition', 'OperatorRosterSlots', 'pPA', 'pR', 'pS', 'pD', 'pT', 'pHR', 'pRBI', 'pBB', 'pHP', 'pSB', 'pSO', 'DraftKingsPoints', 'FanDuelPoints', 'H_A', 'Opponent_ID']].reset_index(drop=True)

    return projection_df

In [432]:
projection_df = generate_projection_df_hitters(sals_with_batting_order)

In [435]:
projection_df

Unnamed: 0,PlayerID,SlateID,Operator,OperatorPlayerID,TeamID,OperatorSalary,OperatorGameType,SalaryCap,OperatorPlayerName,OperatorPosition,OperatorRosterSlots,pPA,pR,pS,pD,pT,pHR,pRBI,pBB,pHP,pSB,pSO,DraftKingsPoints,FanDuelPoints,H_A,Opponent_ID
0,10009914,21109,DraftKings,826668,11.0,2000,Classic,50000.0,René Pinto,C,[C],4.42,0.19,0.59,0.16,0.00,0.15,0.11,0.35,0.05,0.01,1.30,5.52,6.78,H,13.0
1,10001934,21109,DraftKings,657851,20.0,6200,Classic,50000.0,Byron Buxton,OF,[OF],4.44,0.35,0.50,0.29,0.01,0.33,0.38,0.23,0.08,0.08,1.20,8.81,11.15,H,17.0
2,10000217,21109,DraftKings,390610,29.0,5500,Classic,50000.0,Anthony Rizzo,1B,[1B],4.43,0.37,0.54,0.15,0.01,0.28,0.31,0.48,0.18,0.04,0.64,8.13,10.46,H,19.0
3,10006284,21109,DraftKings,876140,13.0,5600,Classic,50000.0,Ty France,1B,[1B],4.41,0.29,0.77,0.21,0.01,0.16,0.19,0.37,0.14,0.00,0.65,7.02,8.70,A,11.0
4,10007299,21109,DraftKings,919327,22.0,5400,Classic,50000.0,Jazz Chisholm Jr.,2B,[2B],4.47,0.25,0.61,0.20,0.04,0.19,0.18,0.36,0.04,0.14,1.10,7.41,9.14,A,35.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5866,10010048,21120,FanDuel,163706,31.0,0,Snake Draft,0.0,Brendan Donovan,2B,[UTIL],4.41,0.20,0.59,0.18,0.01,0.14,0.09,0.39,0.09,0.02,1.00,5.79,7.14,H,14.0
5867,10000590,21120,FanDuel,5177,31.0,0,Snake Draft,0.0,Albert Pujols,1B,[UTIL],4.41,0.21,0.60,0.14,0.00,0.18,0.15,0.28,0.05,0.04,0.77,5.88,7.23,H,14.0
5868,10000406,21120,FanDuel,38074,31.0,0,Snake Draft,0.0,Corey Dickerson,OF,[UTIL],4.41,0.17,0.72,0.20,0.03,0.09,0.06,0.33,0.02,0.03,0.71,5.61,6.69,H,14.0
5869,10009865,21120,FanDuel,163704,14.0,0,Snake Draft,0.0,Cooper Hummel,OF,[UTIL],4.39,0.25,0.55,0.19,0.02,0.14,0.11,0.62,0.04,0.06,0.93,6.50,8.18,A,31.0


In [427]:
p_proj = pd.read_csv('pitchers-apr28.csv')
p_proj

Unnamed: 0,PlayerID,TeamID,OperatorPlayerName,pIP,pK,pBB,pHR,pH,pHBP,pBF
0,10000432,30.0,Justin Verlander,5.03,5.48,1.39,0.77,3.97,0.24,20.69
1,10009500,19.0,Bruce Zimmermann,4.68,4.71,1.63,0.79,4.64,0.22,20.53
2,10010729,17.0,Tarik Skubal,5.02,5.35,1.43,0.96,4.57,0.24,21.3
3,10005575,16.0,Michael Kopech,4.47,5.27,1.58,0.56,3.74,0.21,18.94
4,10005970,5.0,Brad Keller,5.16,4.33,1.97,0.56,4.97,0.25,22.67
5,10010317,20.0,Bailey Ober,4.81,4.51,1.3,0.81,4.59,0.23,20.55
6,10000986,29.0,Jameson Taillon,4.95,4.65,1.37,0.8,4.54,0.24,21.0
7,10007369,13.0,Chris Flexen,5.61,4.14,1.55,0.62,5.57,0.27,24.22
8,10005998,23.0,Austin Gomber,4.91,4.73,1.93,0.69,4.28,0.24,21.18
9,10001253,28.0,Martín Pérez,4.68,4.03,1.77,0.65,4.85,0.22,20.88


In [436]:
b = projection_df[['PlayerID', 'TeamID', 'OperatorPlayerName', 'pPA', 'pR', 'pS', 'pD', 'pT', 'pHR', 'pRBI', 'pBB', 'pHP', 'pSO', 'Opponent_ID']]
b.drop_duplicates(subset=None, keep='first', inplace=True)
b = b.reset_index(drop=True)
b

Unnamed: 0,PlayerID,TeamID,OperatorPlayerName,pPA,pR,pS,pD,pT,pHR,pRBI,pBB,pHP,pSO,Opponent_ID
0,10009914,11.0,René Pinto,4.42,0.19,0.59,0.16,0.00,0.15,0.11,0.35,0.05,1.30,13.0
1,10001934,20.0,Byron Buxton,4.44,0.35,0.50,0.29,0.01,0.33,0.38,0.23,0.08,1.20,17.0
2,10000217,29.0,Anthony Rizzo,4.43,0.37,0.54,0.15,0.01,0.28,0.31,0.48,0.18,0.64,19.0
3,10006284,13.0,Ty France,4.41,0.29,0.77,0.21,0.01,0.16,0.19,0.37,0.14,0.65,11.0
4,10007299,22.0,Jazz Chisholm Jr.,4.47,0.25,0.61,0.20,0.04,0.19,0.18,0.36,0.04,1.10,35.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
906,10003343,14.0,Wilmer Difo,4.39,0.14,0.63,0.17,0.02,0.08,0.02,0.34,0.06,0.88,31.0
907,10005557,14.0,Braden Bishop,4.39,0.17,0.58,0.18,0.02,0.11,0.06,0.39,0.05,1.01,31.0
908,10008979,,Nick Heath,,,,,,,,,,,
909,10003340,,Alex Dickerson,3.22,0.17,0.42,0.15,0.02,0.12,0.11,0.26,0.04,0.74,26.0


In [437]:
p = b.merge(p_proj, left_on='Opponent_ID', right_on='TeamID')
p.head()

Unnamed: 0,PlayerID_x,TeamID_x,OperatorPlayerName_x,pPA,pR,pS,pD,pT,pHR_x,pRBI,pBB_x,pHP,pSO,Opponent_ID,PlayerID_y,TeamID_y,OperatorPlayerName_y,pIP,pK,pBB_y,pHR_y,pH,pHBP,pBF
0,10009914,11.0,René Pinto,4.42,0.19,0.59,0.16,0.0,0.15,0.11,0.35,0.05,1.3,13.0,10007369,13.0,Chris Flexen,5.61,4.14,1.55,0.62,5.57,0.27,24.22
1,10008199,11.0,Wander Franco,4.42,0.21,0.65,0.25,0.05,0.15,0.15,0.26,0.03,0.58,13.0,10007369,13.0,Chris Flexen,5.61,4.14,1.55,0.62,5.57,0.27,24.22
2,10001086,11.0,Ji-Man Choi,4.42,0.26,0.54,0.23,0.01,0.14,0.11,0.67,0.03,1.27,13.0,10007369,13.0,Chris Flexen,5.61,4.14,1.55,0.62,5.57,0.27,24.22
3,10006431,11.0,Francisco Mejía,4.42,0.19,0.63,0.2,0.03,0.14,0.11,0.24,0.08,1.05,13.0,10007369,13.0,Chris Flexen,5.61,4.14,1.55,0.62,5.57,0.27,24.22
4,10006272,11.0,Brandon Lowe,4.42,0.26,0.51,0.2,0.01,0.21,0.19,0.46,0.05,1.15,13.0,10007369,13.0,Chris Flexen,5.61,4.14,1.55,0.62,5.57,0.27,24.22


In [444]:
lg_a = game_logs[['PA', 'S', 'D', 'T', 'HR', 'BB', 'HP', 'SO']].reset_index(drop=True).mean().tolist()
a_pa = lg_a[0]
lg_p = [item / a_pa for item in lg_a]
lg_a_h = lg_p[1] + lg_p[2] + lg_p[3] + lg_p[4]
lg_p.append(lg_a_h)
lg_p


[1.0,
 0.13469809048683984,
 0.04266299673146396,
 0.0034979069900797064,
 0.024083949767761913,
 0.08882390045300763,
 0.011181833820746603,
 0.23114857503297206,
 0.2049429439761454]

In [446]:
or_stats = ['HR', 'BB', 'HBP', 'SO', 'H']
lg_a_or_dict = {}
for i in range(len(or_stats)):
    lg_a_or_dict[or_stats[i]] = lg_p[i + 4] / (1 - lg_p[i + 4])
lg_a_or_dict

{'HR': 0.024678300722721664,
 'BB': 0.0974826935179358,
 'HBP': 0.011308281141266528,
 'SO': 0.3006414081145585,
 'H': 0.25777136675081136}

In [476]:
p[p.OperatorPlayerName_y == 'Jeffrey Springs']

Unnamed: 0,PlayerID_x,TeamID_x,OperatorPlayerName_x,pPA,pR,pS,pD,pT,pHR_x,pRBI,pBB_x,pHP,pSO,Opponent_ID,PlayerID_y,TeamID_y,OperatorPlayerName_y,pIP,pK,pBB_y,pHR_y,pH,pHBP,pBF
103,10006284,13.0,Ty France,4.41,0.29,0.77,0.21,0.01,0.16,0.19,0.37,0.14,0.65,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
104,10005680,13.0,J.P. Crawford,4.41,0.22,0.72,0.2,0.03,0.09,0.07,0.48,0.07,0.52,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
105,10000311,13.0,Eugenio Suárez,4.41,0.25,0.43,0.18,0.0,0.21,0.17,0.52,0.04,1.21,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
106,10001939,13.0,Tom Murphy,4.41,0.25,0.6,0.16,0.01,0.16,0.13,0.54,0.03,1.19,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
107,10006044,13.0,Mitch Haniger,4.41,0.25,0.51,0.16,0.01,0.23,0.22,0.31,0.05,1.12,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
108,10005834,13.0,Adam Frazier,4.41,0.14,0.71,0.21,0.03,0.05,-0.0,0.35,0.06,0.65,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
109,10008513,13.0,Julio Rodríguez,4.41,0.13,0.59,0.2,0.02,0.07,0.0,0.39,0.04,1.5,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
110,10009303,13.0,Jarred Kelenic,4.41,0.16,0.44,0.16,0.02,0.14,0.07,0.41,0.04,1.49,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
111,10001947,13.0,Jesse Winker,4.41,0.25,0.56,0.21,0.01,0.13,0.1,0.64,0.07,0.7,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68
112,10008490,13.0,Abraham Toro,4.41,0.16,0.51,0.15,0.01,0.13,0.06,0.36,0.09,0.8,11.0,10008737,11.0,Jeffrey Springs,4.91,5.84,1.9,0.84,4.81,0.24,21.68


In [508]:
team_totals = {}
OR_pitchers = {}
pitcher_index = {}

for r in p.index:
    row = p.loc[r,:].tolist()
    op = row[16]
    pid = row[14]
    team = row[15]
    ip = row[17]
    if op in pitcher_index: pass
    else:
        pitcher_index[op] = [pid, team, ip]

# OR for pitchers
for r in p.index:
    row = p.loc[r, :].tolist()
    n = row[16]
    
    bf = row[23]
    
    k_p = row[18] / bf
    hr_p = row[20] / bf
    bb_p = row[19] / bf
    h_p = row[21] / bf
    hbp_p = row[22] / bf
    
    p_or_k = k_p / (1 - k_p)
    p_or_hr = hr_p / (1 - hr_p)
    p_or_h = h_p / (1 - h_p)
    p_or_bb = bb_p / (1 - bb_p)
    p_or_hbp = hbp_p / (1 - hbp_p)    
    
    if n in OR_pitchers: pass
    else:
        OR_pitchers[n] = [p_or_k, p_or_hr, p_or_h, p_or_bb, p_or_hbp, bf]
        
for r in p.index:
    row = p.loc[r, :].tolist()
    row = p.loc[r,:].tolist()
    batter_team = row[1]
    if np.isnan(batter_team): continue
    else:
        # get total team projected outs
        pa = row[3]
        hits = row[5] + row[6] + row[7] + row[8]
        outs = pa - hits - row[10] - row[11]


        if batter_team in team_totals: 
            team_totals[batter_team][0] += pa
            team_totals[batter_team][1] += outs
        else: team_totals[batter_team] = [pa, outs]

stats = ['SO', 'HR', 'H', 'BB', 'HBP']
r_lst = []

adj_p_dict = {}

for r in p.index:
    row = p.loc[r,:].tolist()
    pa = row[3]
    t = row[1]
    if np.isnan(t) or np.isnan(row[4]): continue
    else:
        hits = row[5] + row[6] + row[7] + row[8]

        # find odds ratio for K, HR, H, HBP, BB

        k_p = row[12] / pa
        hr_p = row[8] / pa
        bb_p = row[10] / pa
        h_p = hits / pa
        hbp_p = row[11] / pa

        b_or_k = k_p / (1 - k_p)
        b_or_hr = hr_p / (1 - hr_p)
        b_or_h = h_p / (1 - h_p)
        b_or_bb = bb_p / (1 - bb_p)
        b_or_hbp = hbp_p / (1 - hbp_p)

        b_or = [b_or_k, b_or_hr, b_or_h, b_or_bb, b_or_hbp]

        for i in range(5):
            op = row[16]
            if op in adj_p_dict: pass
            else:
                adj_p_dict[op] = [0, 0, 0, 0, 0]

            adj_b_or = (b_or[i] * OR_pitchers[op][i]) / lg_a_or_dict[stats[i]]
            adj_b_p = adj_b_or / (adj_b_or + 1)
            ns = adj_b_p * pa
            
            ps = round(ns / (team_totals[t][0] / OR_pitchers[op][5]), 2)

            pavspp = OR_pitchers[op][5] / (team_totals[t][0] / (team_totals[t][1] / 27))
            ns_sp = ns * pavspp
            ns_rp = (b_or[i] / (b_or[i] + 1)) * pa * (1 - pavspp)
            
            adj_p_dict[op][i] += ps

            ns_t = round(ns_rp + ns_sp, 2)

            row.append(ns_t)

        r_lst.append(row)
        
        
        
    
    

In [509]:
for key in pitcher_index:
    adj_p_dict[key] += pitcher_index[key]


Chris Flexen
Tarik Skubal
Bruce Zimmermann
Jeffrey Springs
Patrick Corbin
Tyler Mahle
Zack Wheeler
Justin Verlander
Austin Gomber
Bailey Ober
José Quintana
Brad Keller
Freddy Peralta
Michael Kopech
Trevor Rogers
Jameson Taillon
Martín Pérez
Nick Martinez
Garrett Whitlock
Cal Quantrill
Reid Detmers
Alek Manoah
Humberto Castellanos
Kyle Wright
Drew Smyly
Dakota Hudson


In [510]:
#####
## Just need to get all the data lines up in csvs with all pertinent information
## Re run just to make sure, move batter and pitchers to one file
## Migrate to .py file
## SCHEDULE a chat

In [513]:
v = pd.DataFrame.from_dict(adj_p_dict, orient='index')
v = v.reset_index()
v.columns = ['Name', 'K', 'HR', 'H', 'BB', 'HBP', 'PlayerID', 'TeamID', 'IP']

In [515]:
v = v[['PlayerID', 'TeamID', 'Name', 'IP', 'H', 'HR', 'K', 'BB', 'HBP']]
#v['DraftKingsPoints'] = v.apply(lambda row: round(row['IP'] * 2.25 + row['K'] * 2 + row['pW'] * 4 + row['pER'] * -2 + row['pH'] * -0.6 + row['pBB'] * -0.6 + row['pHBP'] * -0.6, 2), axis=1)
#v['FanDuelPoints'] = v.apply(lambda row: round(row['pW'] * 6 + row['pQS'] * 4 + row['pER'] * -3 + row['pK'] * 3 + row['pIP'] * 3, 2), axis=1)

v

Unnamed: 0,PlayerID,TeamID,Name,IP,H,HR,K,BB,HBP
0,10007369,13.0,Chris Flexen,5.61,5.57,0.69,4.28,1.47,0.32
1,10010729,17.0,Tarik Skubal,5.02,4.77,1.2,5.42,1.46,0.31
2,10009500,19.0,Bruce Zimmermann,4.68,4.75,1.07,4.69,1.67,0.3
3,10008737,11.0,Jeffrey Springs,4.91,4.62,0.91,5.69,1.85,0.29
4,10000007,35.0,Patrick Corbin,5.29,6.15,1.06,5.21,2.37,0.33
5,10006353,2.0,Tyler Mahle,5.36,5.06,0.95,6.19,2.17,0.34
6,10000807,12.0,Zack Wheeler,6.33,5.96,0.79,6.14,1.62,0.33
7,10000432,30.0,Justin Verlander,5.03,4.07,0.85,5.36,1.36,0.3
8,10005998,23.0,Austin Gomber,4.91,4.5,0.81,4.8,1.92,0.32
9,10010317,20.0,Bailey Ober,4.81,4.85,0.97,4.59,1.29,0.29
