In [4]:
import pandas as pd
import numpy as np
import pickle

In [5]:
## read in our pickle file!
filename = open('final_model.pkl', 'rb')

In [6]:
## instantiate the trained model
model = pickle.load(filename)

In [7]:
# don't forget to close pickle!
filename.close()

In [16]:
# the pickled model contains the coefficients and intercept
print(model.coef_)
print(model.intercept_)

[ 0.01734693  0.4079972   0.8934535  -0.26518965  0.26239371  1.13351088
  1.23206515 -0.56042056 -1.80515547]
-4.321258350810403


In [37]:
# what is the equation for this model?
xcolumns=['AtBat', 'Hits', 'HmRun', 'RBI', 'Walks', 'AE', 'AW', 'NE', 'NW']
coefs=list(model.coef_)
xcoefs = [(coefs[i], cols[i]) for i in range(0, len(cols))]
xcoefs

[(0.01734692713038667, 'AtBat'),
 (0.4079972014242659, 'Hits'),
 (0.8934535020025788, 'HmRun'),
 (-0.2651896522206619, 'RBI'),
 (0.262393705142494, 'Walks'),
 (1.1335108810885197, 'AE'),
 (1.2320651503896902, 'AW'),
 (-0.5604205619301984, 'NE'),
 (-1.8051554695480112, 'NW')]

In [38]:
output = f'y = {round(model.intercept_,2)} + {round(xcoefs[0][0],2)}*{xcoefs[0][1]} + {round(xcoefs[1][0],2)}*{xcoefs[1][1]} + {round(xcoefs[2][0],2)}*{xcoefs[2][1]} + {round(xcoefs[3][0],2)}*{xcoefs[3][1]} + {round(xcoefs[4][0],2)}*{xcoefs[4][1]} + {round(xcoefs[5][0],2)}*{xcoefs[5][1]} + {round(xcoefs[6][0],2)}*{xcoefs[6][1]}  + {round(xcoefs[7][0],2)}*{xcoefs[7][1]} + {round(xcoefs[8][0],2)}*{xcoefs[8][1]} '
print("Regression Equation: ", output)  


Regression Equation:  y = -4.32 + 0.02*AtBat + 0.41*Hits + 0.89*HmRun + -0.27*RBI + 0.26*Walks + 1.13*AE + 1.23*AW  + -0.56*NE + -1.81*NW 


### generate some fake data
Let's pretend it's 1956

#### Mickey Mantle

In [23]:
# actual stats:
Runs = 132
AtBat = 533
Hits = 188
HmRun = 52
RBI = 130
Walks = 112
AE=1
AW=0
NE=0
NW=0

In [24]:
# turn those stats into a dataframe
features=['AtBat', 'Hits', 'HmRun', 'RBI', 'Walks', 'AE', 'AW', 'NE', 'NW']
stats=[AtBat, Hits, HmRun, RBI, Walks, AE, AW, NE, NW]
mickey = pd.DataFrame(list(zip(features, stats)), columns=['features','stats'])
mickey= mickey.set_index('features')
mickey

Unnamed: 0_level_0,stats
features,Unnamed: 1_level_1
AtBat,533
Hits,188
HmRun,52
RBI,130
Walks,112
AE,1
AW,0
NE,0
NW,0


In [25]:
# remember you can resape your data
reshaped_stats = np.array(mickey['stats']).reshape(1,-1)

In [26]:
# how many runs do we predict that mickey got?
model.predict(reshaped_stats )

array([124.13466085])

In [39]:
# Notice how we get the same results by simply using the equation.
y = -4.32 + 0.02*AtBat + 0.41*Hits + 0.89*HmRun + -0.27*RBI + 0.26*Walks + 1.13*AE + 1.23*AW  + -0.56*NE + -1.81*NW
print(y)

124.85


In 1956, Mantle won the Major League's Triple Crown when he batted . 353, hit 52 home runs and batted in 132 runs.
* http://mickeymantle.com/1956-statistics/
* https://www.baseball-reference.com/players/m/mantlmi01.shtml

In [29]:
# what's our error?
error = abs(model.predict(reshaped_stats )-132)
print("For Mickey Mantle, our model is off by about {} runs".format(round(error[0]),2))

For Mickey Mantle, our model is off by about 8 runs


![Supervised learning diagram](http://mickeymantle.com/wp-content/uploads/2014/10/56toppsB.jpg)