In [3]:
import pandas as pd
import numpy as np
import os
import pickle

In [4]:
import sys
sys.path.append(os.environ.get('PYTHONPATH'))
from config import config

In [41]:
pitching = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'daily', 'pitchingProjected.csv'))
players = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'players.csv'))
teams = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'teams.csv'))

In [29]:
with open('logisticModel.model', 'rb') as f:
    lr = pickle.load(f)
f.close()



In [30]:
def nineInningNormalize(df, statColumn, inningsPitchedColumn = 'ip'):
    return df[statColumn] * 9/(df[inningsPitchedColumn] % 1 * 10 / 3 + merged[inningsPitchedColumn].round())

In [55]:
merged = pitching.merge(players, how = 'left', left_on=['player_id'], right_on=['player_id'])
merged['season'] = config.CURRENT_SEASON
merged = merged.merge(teams, how = 'left', left_on=['team_id', 'season'], right_on=['team_id', 'season'])

In [35]:
minimumInningsPitched = 100

merged = merged[merged['ip'] > minimumInningsPitched]

merged['sv_pct'] = merged['sv']/merged['svo']
merged['win_pct'] = merged['w']/(merged['w'] + merged['l'])
merged['hits_9'] = nineInningNormalize(merged, 'h')
merged['hrs_9'] = nineInningNormalize(merged, 'hr')
merged['bbs_9'] = nineInningNormalize(merged, 'bb')
merged['ks_9'] = nineInningNormalize(merged, 'so')
merged['ers_9'] = nineInningNormalize(merged, 'er')

modelData = merged[['hits_9', 'hrs_9', 'bbs_9', 'ks_9', 'ers_9', 'sv_pct', 'win_pct', 'era', 'whip', 'name_display_first_last']].fillna(0)

In [36]:
modelData['prediction'] = lr.predict_proba(modelData.loc[:, modelData.columns != 'name_display_first_last'])[:,1]
modelData.sort_values(by = ['prediction'], ascending=False)

Unnamed: 0,hits_9,hrs_9,bbs_9,ks_9,ers_9,sv_pct,win_pct,era,whip,name_display_first_last,prediction
393,7.570093,1.110280,1.564486,9.487850,2.927103,0.0,0.666667,2.93,1.01,Hyun-Jin Ryu,0.857293
119,6.684466,1.354369,2.315534,11.228155,3.189320,0.0,0.708333,3.19,1.00,Justin Verlander,0.717650
390,8.058935,1.180608,1.745247,8.880228,3.336502,0.0,0.684211,3.34,1.09,Clayton Kershaw,0.713575
561,7.451163,1.004651,2.051163,12.055814,3.223256,0.0,0.565217,3.22,1.06,Max Scherzer,0.639224
502,6.824834,1.017738,2.274945,10.057650,2.993348,0.0,0.555556,2.99,1.01,0,0.623631
243,7.028351,0.835052,3.131443,10.786082,2.992268,0.0,0.687500,2.99,1.13,Tyler Glasnow,0.620850
40,7.065858,1.123050,2.012132,12.166378,3.369151,0.0,0.545455,3.37,1.01,Chris Sale,0.579862
437,7.141638,0.967577,2.441980,11.795222,2.994881,0.0,0.541667,2.99,1.06,Jacob deGrom,0.572808
71,8.109712,1.214029,1.893885,10.877698,3.544964,0.0,0.608696,3.54,1.11,Carlos Carrasco,0.526134
382,7.614000,0.972000,2.646000,9.774000,3.618000,0.0,0.684211,3.62,1.14,Walker Buehler,0.489831


In [40]:
merged[['name_display_first_last', 'player_id', 'ip']].sort_values(by=['ip'])
merged.columns

Index(['hr', 'player', 'wpct', 'era', 'bsv', 'outs', 'sho', 'sv', 'whip', 'qs',
       'bb', 'g', 'hld', 'so', 'l', 'hb', 'svo', 'h', 'ip', 'w', 'r', 'pa',
       'player_id', 'cg', 'gs', 'ibb', 'er', 'birth_country', 'name_prefix',
       'name_display_first_last', 'college', 'height_inches', 'death_country',
       'age', 'name_display_first_last_html', 'gender', 'height_feet',
       'pro_debut_date', 'death_date', 'primary_position', 'birth_date',
       'team_abbrev', 'status', 'name_display_last_first_html', 'throws',
       'death_city', 'primary_position_txt', 'high_school',
       'name_display_roster_html', 'name_use', 'status_date',
       'primary_stat_type', 'team_id', 'active_sw', 'primary_sport_code',
       'birth_state', 'weight', 'name_middle', 'name_display_roster',
       'end_date', 'jersey_number', 'death_state', 'name_first', 'bats',
       'team_code', 'birth_city', 'name_nick', 'status_code',
       'name_matrilineal', 'team_name', 'name_display_last_first',
  

In [54]:
teams.season

0       1960
1       1960
2       1960
3       1960
4       1960
5       1960
6       1960
7       1960
8       1960
9       1960
10      1960
11      1960
12      1960
13      1960
14      1960
15      1960
16      1961
17      1961
18      1961
19      1961
20      1961
21      1961
22      1961
23      1961
24      1961
25      1961
26      1961
27      1961
28      1961
29      1961
        ... 
1494    2017
1495    2017
1496    2017
1497    2017
1498    2017
1499    2017
1500    2017
1501    2017
1502    2017
1503    2017
1504    2017
1505    2017
1506    2017
1507    2017
1508    2017
1509    2017
1510    2017
1511    2017
1512    2017
1513    2017
1514    2017
1515    2017
1516    2017
1517    2017
1518    2017
1519    2017
1520    2017
1521    2017
1522    2017
1523    2017
Name: season, Length: 1524, dtype: int64