In [39]:
import pandas as pd
import numpy as np
import os
import pickle

In [40]:
import sys
sys.path.append(os.environ.get('PYTHONPATH'))
from config import config

In [41]:
pitching = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'daily', 'pitchingProjected.csv'), encoding = 'latin')
hitting = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'daily', 'hittingProjected.csv'), encoding = 'latin')
players = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'players.csv'), encoding = 'latin')
teams = pd.read_csv(os.path.join(config.PROJECT_ROOT_DIR, 'data', 'historical', 'teams.csv'), encoding = 'latin')

In [42]:
with open('mvp.model', 'rb') as f:
    lr = pickle.load(f)
f.close()

In [43]:
def nineInningNormalize(df, statColumn, inningsPitchedColumn = 'ip'):
    return df[statColumn] * 9/(df[inningsPitchedColumn] % 1 * 10 / 3 + merged[inningsPitchedColumn].round())

In [44]:
def atBatNormalize(df, statColumn, atBatColumn):
    return df[statColumn] / df[atBatColumn]

In [45]:
stats = pitching.merge(hitting, how='outer', left_on=['player_id'], right_on=['player_id'])
merged = stats.merge(players, how = 'left', left_on=['player_id'], right_on=['player_id'])
merged['season'] = config.CURRENT_SEASON
merged = merged.merge(teams, how = 'left', left_on=['team_id', 'season'], right_on=['team_id', 'season'])

In [46]:
minimumInningsPitched = 100
minimumAtBats = 300

merged = merged[((merged['ip'] > minimumInningsPitched) & (merged['primary_position'] == '1')) |
                ((merged['ab'] > minimumAtBats) & (merged['primary_position'] != '1'))]

merged['sv_pct'] = merged['sv']/merged['svo']
merged['win_pct'] = merged['w']/(merged['w'] + merged['l'])
merged['hits_9'] = nineInningNormalize(merged, 'h_x')
merged['hrs_9'] = nineInningNormalize(merged, 'hr_x')
merged['bbs_9'] = nineInningNormalize(merged, 'bb_x')
merged['ks_9'] = nineInningNormalize(merged, 'so_x')
merged['ers_9'] = nineInningNormalize(merged, 'er')

merged['hit_ab'] = atBatNormalize(merged, 'h_y', 'ab')
merged['hr_ab'] = atBatNormalize(merged, 'hr_y', 'ab')
merged['rbi_ab'] = atBatNormalize(merged, 'rbi', 'ab')
merged['bb_ab'] = atBatNormalize(merged, 'bb_y', 'ab')
merged['k_ab'] = atBatNormalize(merged, 'so_y', 'ab')

modelData = merged[['hits_9', 'hrs_9', 'bbs_9', 'ks_9', 'ers_9', 'sv_pct', 'win_pct', 'era', 'whip', 'hit_ab', 'hr_ab',
                    'rbi_ab', 'bb_ab', 'k_ab', 'slg', 'obp', 'name_display_first_last', 'team_name', 'division_full',
                    'primary_position', 'ab', 'ip']]

modelData.loc[modelData['ip'] <= 5, 'ks_9'] = 0.0

modelData.loc[modelData['era'] == '-.--', 'era'] = 0.0
modelData.loc[modelData['era'] == '*.**', 'era'] = 0.0
modelData.loc[modelData['whip'] == '-.--', 'whip'] = 0.0
modelData.loc[modelData['whip'] == '*.**', 'whip'] = 0.0

modelData.loc[modelData['slg'] == '.---', 'slg'] = 0.0
modelData.loc[modelData['obp'] == '.---', 'obp'] = 0.0

modelData = modelData.replace([np.inf, -np.inf], np.nan).fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
  result = method(y)


In [47]:
predictData = modelData
predictData = predictData.loc[:, predictData.columns != 'name_display_first_last']
predictData = predictData.loc[:, predictData.columns != 'team_name']
predictData = predictData.loc[:, predictData.columns != 'division_full']
predictData = predictData.loc[:, predictData.columns != 'primary_position']
predictData = predictData.loc[:, predictData.columns != 'ab']
predictData = predictData.loc[:, predictData.columns != 'ip']

modelData['prediction'] = lr.predict_proba(predictData)[:,1]
modelData = modelData.sort_values(by = ['prediction'], ascending=False)

modelData

Unnamed: 0,hits_9,hrs_9,bbs_9,ks_9,ers_9,sv_pct,win_pct,era,whip,hit_ab,...,k_ab,slg,obp,name_display_first_last,team_name,division_full,primary_position,ab,ip,prediction
870,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.314815,...,0.225926,0.611,0.401,Cody Bellinger,Los Angeles Dodgers,National League West,3,540.0,0.0,0.917
683,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.297571,...,0.238866,0.589,0.452,Mike Trout,Los Angeles Angels,American League West,8,494.0,0.0,0.557
588,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.304505,...,0.236036,0.557,0.382,J.D. Martinez,Boston Red Sox,American League East,7,555.0,0.0,0.553
911,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.307407,...,0.231481,0.587,0.410,Christian Yelich,Milwaukee Brewers,National League Central,7,540.0,0.0,0.506
851,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.296173,...,0.166389,0.574,0.361,Nolan Arenado,Colorado Rockies,National League West,5,601.0,0.0,0.495
943,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.286972,...,0.230634,0.526,0.373,Josh Bell,Pittsburgh Pirates,National League Central,3,568.0,0.0,0.352
831,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.273214,...,0.171429,0.527,0.385,Anthony Rizzo,Chicago Cubs,National League Central,3,560.0,0.0,0.147
645,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.273874,...,0.174775,0.519,0.374,Alex Bregman,Houston Astros,American League West,5,555.0,0.0,0.138
654,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.289199,...,0.242160,0.549,0.378,George Springer,Houston Astros,American League West,8,574.0,0.0,0.031
580,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.290657,...,0.169550,0.512,0.390,Mookie Betts,Boston Red Sox,American League East,9,578.0,0.0,0.020
