In [8]:
import pandas as pd
import numpy as np
import os
import pickle

In [9]:
import sys
sys.path.append(os.environ.get('PYTHONPATH'))
from config import config

In [10]:
pitching = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'projected', 'pitchingProjected.csv'), encoding = 'latin')
players = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'players.csv'), encoding = 'latin')
teams = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'teams.csv'), encoding = 'latin')

In [11]:
with open('cyYoung.model', 'rb') as f:
    model = pickle.load(f)
f.close()



In [12]:
def nineInningNormalize(df, statColumn, inningsPitchedColumn = 'ip'):
    return df[statColumn] * 9/(df[inningsPitchedColumn] % 1 * 10 / 3 + merged[inningsPitchedColumn].round())

In [13]:
merged = pitching.merge(players, how = 'left', left_on=['player_id'], right_on=['player_id'])
merged['season'] = config.CURRENT_SEASON
merged = merged.merge(teams, how = 'left', left_on=['team_id', 'season'], right_on=['team_id', 'season'])

In [14]:
minimumInningsPitched = 100

merged = merged[merged['ip'] > minimumInningsPitched]

# merged['sv_pct'] = merged['sv']/merged['svo']
# merged['win_pct'] = merged['w']/(merged['w'] + merged['l'])
# merged['hits_9'] = nineInningNormalize(merged, 'h')
# merged['hrs_9'] = nineInningNormalize(merged, 'hr')
# merged['bbs_9'] = nineInningNormalize(merged, 'bb')
# merged['ks_9'] = nineInningNormalize(merged, 'so')
# merged['ers_9'] = nineInningNormalize(merged, 'er')

modelData = merged[['h', 'hr', 'bb', 'so', 'er', 'sv', 'svo', 'w', 'l', 'era', 'whip', 'ip', 'name_display_first_last', 'team_name', 'division_full']].fillna(0)

In [15]:
predictData = modelData.loc[:, modelData.columns != 'name_display_first_last']
predictData = predictData.loc[:, predictData.columns != 'team_name']
predictData = predictData.loc[:, predictData.columns != 'division_full']

modelData['prediction'] = model.predict_proba(predictData)[:,1]
modelData.sort_values(by = ['prediction'], ascending=False)

Unnamed: 0,h,hr,bb,so,er,sv,svo,w,l,era,whip,ip,name_display_first_last,team_name,division_full,prediction
122,150,31,53,259,71,0,0,18,7,3.09,0.98,207.0,Justin Verlander,Houston Astros,American League West,7.340382e-01
403,152,21,31,187,57,0,0,15,7,2.83,1.01,181.1,Hyun-Jin Ryu,Los Angeles Dodgers,National League West,5.678836e-01
109,154,25,58,275,74,0,0,15,9,3.36,1.07,198.1,Gerrit Cole,Houston Astros,American League West,4.368536e-01
253,145,17,73,207,64,0,0,12,6,3.25,1.23,177.0,Charlie Morton,Tampa Bay Rays,American League East,3.609648e-01
578,178,24,52,285,78,0,0,13,11,3.30,1.08,213.0,Max Scherzer,Washington Nationals,National League East,3.008624e-01
359,140,18,65,211,62,0,0,13,8,3.06,1.12,182.1,Luis Castillo,Cincinnati Reds,National League Central,2.915077e-01
580,165,23,54,239,76,0,0,13,9,3.43,1.10,199.2,Stephen Strasburg,Washington Nationals,National League East,2.617634e-01
566,167,23,63,222,74,0,0,13,8,3.42,1.18,194.2,Patrick Corbin,Washington Nationals,National League East,2.370189e-01
392,145,19,47,183,66,0,0,13,7,3.52,1.14,168.2,Walker Buehler,Los Angeles Dodgers,National League West,2.072701e-01
251,99,12,44,152,42,0,0,11,5,2.98,1.13,126.2,Tyler Glasnow,Tampa Bay Rays,American League East,2.037299e-01


In [16]:
merged[['name_display_first_last', 'player_id', 'ip']].sort_values(by=['ip'])
merged

Unnamed: 0,hr,player,wpct,era,bsv,outs,sho,sv,whip,qs,...,address_state,division_full,time_zone_num,spring_league_full,address,name_display_brief,file_code_y,division_id,spring_league_id,venue_short
4,35,"Bundy, Dylan",.364,4.91,0,500,0,0,1.33,10,...,MD,American League East,-4.0,Grapefruit League,"333 West Camden Street&#xa;Baltimore, MD&#x9;2...",Orioles,bal,201.0,115.0,Oriole Park
5,29,"Cashner, Andrew",.450,4.90,0,518,0,0,1.45,12,...,MD,American League East,-4.0,Grapefruit League,"333 West Camden Street&#xa;Baltimore, MD&#x9;2...",Orioles,bal,201.0,115.0,Oriole Park
7,24,"Cobb, Alex",.333,5.65,0,306,0,0,1.46,6,...,MD,American League East,-4.0,Grapefruit League,"333 West Camden Street&#xa;Baltimore, MD&#x9;2...",Orioles,bal,201.0,115.0,Oriole Park
15,23,"Means, John",.474,4.33,0,380,0,0,1.29,7,...,MD,American League East,-4.0,Grapefruit League,"333 West Camden Street&#xa;Baltimore, MD&#x9;2...",Orioles,bal,201.0,115.0,Oriole Park
20,35,"Straily, Dan",.294,6.78,0,394,0,0,1.63,4,...,MD,American League East,-4.0,Grapefruit League,"333 West Camden Street&#xa;Baltimore, MD&#x9;2...",Orioles,bal,201.0,115.0,Oriole Park
35,29,"Porcello, Rick",.522,4.56,0,545,0,0,1.31,15,...,MA,American League East,-4.0,Grapefruit League,"4 Yawkey Way&#xa;Boston, MA&#x9;2215",Red Sox,bos,201.0,115.0,Fenway Park
37,22,"Price, David",.588,3.76,0,467,0,0,1.19,13,...,MA,American League East,-4.0,Grapefruit League,"4 Yawkey Way&#xa;Boston, MA&#x9;2215",Red Sox,bos,201.0,115.0,Fenway Park
39,21,"Rodriguez, Eduardo",.600,4.32,0,500,0,0,1.33,14,...,MA,American League East,-4.0,Grapefruit League,"4 Yawkey Way&#xa;Boston, MA&#x9;2215",Red Sox,bos,201.0,115.0,Fenway Park
40,25,"Sale, Chris",.545,3.39,0,590,0,0,1.03,20,...,MA,American League East,-4.0,Grapefruit League,"4 Yawkey Way&#xa;Boston, MA&#x9;2215",Red Sox,bos,201.0,115.0,Fenway Park
43,13,"Velazquez, Hector",.417,4.80,0,304,0,0,1.38,2,...,MA,American League East,-4.0,Grapefruit League,"4 Yawkey Way&#xa;Boston, MA&#x9;2215",Red Sox,bos,201.0,115.0,Fenway Park
