In [1]:
import pandas as pd
import numpy as np
import os
import pickle

In [2]:
import sys
sys.path.append(os.environ.get('PYTHONPATH'))
from config import config

In [3]:
pitching = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'daily', 'pitchingProjected.csv'), encoding = 'latin')
players = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'players.csv'), encoding = 'latin')
teams = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'teams.csv'), encoding = 'latin')

In [4]:
with open('cyYoung.model', 'rb') as f:
    lr = pickle.load(f)
f.close()



In [5]:
def nineInningNormalize(df, statColumn, inningsPitchedColumn = 'ip'):
    return df[statColumn] * 9/(df[inningsPitchedColumn] % 1 * 10 / 3 + merged[inningsPitchedColumn].round())

In [6]:
merged = pitching.merge(players, how = 'left', left_on=['player_id'], right_on=['player_id'])
merged['season'] = config.CURRENT_SEASON
merged = merged.merge(teams, how = 'left', left_on=['team_id', 'season'], right_on=['team_id', 'season'])

In [7]:
minimumInningsPitched = 100

merged = merged[merged['ip'] > minimumInningsPitched]

merged['sv_pct'] = merged['sv']/merged['svo']
merged['win_pct'] = merged['w']/(merged['w'] + merged['l'])
merged['hits_9'] = nineInningNormalize(merged, 'h')
merged['hrs_9'] = nineInningNormalize(merged, 'hr')
merged['bbs_9'] = nineInningNormalize(merged, 'bb')
merged['ks_9'] = nineInningNormalize(merged, 'so')
merged['ers_9'] = nineInningNormalize(merged, 'er')

modelData = merged[['hits_9', 'hrs_9', 'bbs_9', 'ks_9', 'ers_9', 'sv_pct', 'win_pct', 'era', 'whip', 'name_display_first_last', 'team_name', 'division_full']].fillna(0)

In [8]:
predictData = modelData.loc[:, modelData.columns != 'name_display_first_last']
predictData = predictData.loc[:, predictData.columns != 'team_name']
predictData = predictData.loc[:, predictData.columns != 'division_full']

modelData['prediction'] = lr.predict_proba(predictData)[:,1]
modelData.sort_values(by = ['prediction'], ascending=False)

Unnamed: 0,hits_9,hrs_9,bbs_9,ks_9,ers_9,sv_pct,win_pct,era,whip,name_display_first_last,team_name,division_full,prediction
393,7.570093,1.110280,1.564486,9.487850,2.927103,0.0,0.666667,2.93,1.01,Hyun-Jin Ryu,Los Angeles Dodgers,National League West,0.867305
119,6.684466,1.354369,2.315534,11.228155,3.189320,0.0,0.708333,3.19,1.00,Justin Verlander,Houston Astros,American League West,0.771635
390,8.058935,1.180608,1.745247,8.880228,3.336502,0.0,0.684211,3.34,1.09,Clayton Kershaw,Los Angeles Dodgers,National League West,0.746973
561,7.451163,1.004651,2.051163,12.055814,3.223256,0.0,0.565217,3.22,1.06,Max Scherzer,Washington Nationals,National League East,0.723630
243,7.028351,0.835052,3.131443,10.786082,2.992268,0.0,0.687500,2.99,1.13,Tyler Glasnow,Tampa Bay Rays,American League East,0.706964
502,6.824834,1.017738,2.274945,10.057650,2.993348,0.0,0.555556,2.99,1.01,Chris Paddack,San Diego Padres,National League West,0.666133
40,7.065858,1.123050,2.012132,12.166378,3.369151,0.0,0.545455,3.37,1.01,Chris Sale,Boston Red Sox,American League East,0.664327
437,7.141638,0.967577,2.441980,11.795222,2.994881,0.0,0.541667,2.99,1.06,Jacob deGrom,New York Mets,National League East,0.664115
71,8.109712,1.214029,1.893885,10.877698,3.544964,0.0,0.608696,3.54,1.11,Carlos Carrasco,Cleveland Indians,American League Central,0.621003
193,7.394904,1.031847,2.808917,11.464968,3.267516,0.0,0.631579,3.27,1.13,James Paxton,New York Yankees,American League East,0.593130


In [9]:
merged[['name_display_first_last', 'player_id', 'ip']].sort_values(by=['ip'])
merged

Unnamed: 0,hr,player,wpct,era,bsv,outs,sho,sv,whip,qs,...,division_id,spring_league_id,venue_short,sv_pct,win_pct,hits_9,hrs_9,bbs_9,ks_9,ers_9
4,37,"Bundy, Dylan",.333,5.08,0,505,0,0,1.34,11,...,201.0,115.0,Oriole Park,,0.333333,8.821782,1.978218,3.261386,8.821782,5.079208
5,29,"Cashner, Andrew",.500,5.10,0,503,0,0,1.49,10,...,201.0,115.0,Oriole Park,,0.500000,9.823062,1.556660,3.542744,6.656064,5.099404
7,25,"Cobb, Alex",.357,5.60,0,323,0,0,1.46,6,...,201.0,115.0,Oriole Park,,0.357143,10.699690,2.089783,2.424149,6.520124,5.600619
15,23,"Means, John",.474,4.32,0,381,0,0,1.28,7,...,201.0,115.0,Oriole Park,,0.473684,8.929134,1.629921,2.622047,7.015748,4.322835
20,35,"Straily, Dan",.294,6.60,0,405,0,0,1.60,4,...,201.0,115.0,Oriole Park,,0.294118,10.666667,2.333333,3.733333,6.466667,6.600000
35,28,"Porcello, Rick",.545,4.55,0,540,0,0,1.32,14,...,201.0,115.0,Fenway Park,,0.545455,9.150000,1.400000,2.700000,8.400000,4.550000
37,20,"Price, David",.533,3.93,0,426,0,0,1.20,12,...,201.0,115.0,Fenway Park,,0.533333,8.239437,1.267606,2.598592,9.633803,3.929577
39,21,"Rodriguez, Eduardo",.619,4.24,0,497,0,0,1.32,14,...,201.0,115.0,Fenway Park,,0.619048,8.637827,1.140845,3.205231,9.452716,4.237425
40,24,"Sale, Chris",.545,3.37,0,577,0,0,1.01,20,...,201.0,115.0,Fenway Park,,0.545455,7.065858,1.123050,2.012132,12.166378,3.369151
43,15,"Velazquez, Hector",.462,4.40,0,331,0,0,1.35,3,...,201.0,115.0,Fenway Park,,0.461538,8.972810,1.223565,3.181269,7.096677,4.404834
