# Physical characteristics
* Game and goal level analyses

In [117]:
import os
import sys
import pandas
import numpy
import matplotlib
import matplotlib.pyplot as plt
%matplotlib notebook

import statsmodels.api as sm

pandas.set_option('display.notebook_repr_html', True)
pandas.set_option('display.max_columns', 40)
pandas.set_option('display.max_rows', 25)
pandas.set_option('precision', 4)

from IPython.display import Markdown

# Import data

## Games

In [118]:
dg = pandas.read_csv('nhl_game_2010.csv')
dl = pandas.read_csv('nhl_goal_2010.csv')
DG = dg.copy()
DL = dl.copy()

In [119]:
dg = DG.copy()
dg = dg.drop(['awayGoaliue', 'homeGoalie'], axis=1)
dg = dg.sort_values('gamenumber')
dg = dg.rename(columns={'awayTeam': 'vteamcode', 'awaySalary': 'vsal', 'awayAge': 'vage', 'awayHeight': 'vheight', 'awayWeight': 'vweight', 'awayGoals': 'agoals'})
dg = dg.rename(columns={'homeTeam': 'hteamcode', 'homeSalary': 'hsal', 'homeAge': 'hage', 'homeHeight': 'hheight', 'homeWeight': 'hweight', 'homeGoals': 'agoals'})
dg.head()

Unnamed: 0,gamenumber,vteamcode,vsal,vage,vheight,vweight,agoals,hteamcode,hsal,hage,hheight,hweight,agoals.1,winteamcode
0,20001,MTL,38.3833,501,1313,3695,2,TOR,44.613,471,1312,3715,3,TOR
1,20002,PHI,50.365,498,1317,3669,3,PIT,49.1924,497,1308,3611,2,PHI
2,20003,CAR,35.825,471,1315,3574,4,MIN,40.18,503,1321,3734,3,CAR
3,20004,CHI,50.56,462,1326,3700,3,COL,33.75,469,1306,3627,4,COL
4,20005,CGY,39.765,505,1331,3726,0,EDM,41.4,458,1324,3745,4,EDM


## Goals

In [120]:
dl = DL.copy()
dl = dl.drop(['Unnamed: 0', 'eventtimefromzero', 'eventtype', 'advantagetype'], axis=1)
dl = dl.rename(columns={'iseventteamhome': 'ishgoal'})
dl = dl.sort_values(['season', 'gamenumber', 'eventnumber'])
dl.head()

Unnamed: 0,season,gamenumber,period,eventnumber,eventteamcode,ishgoal,hteamcode,vteamcode,hgoals,vgoals,hheight,hweight,hage,vheight,vweight,vage,hsal,vsal
5150,2010,20001,1,35,TOR,1,TOR,MTL,0,0,365,1042,134,369,1059,126,12700.0,11125.0
5092,2010,20001,1,49,TOR,1,TOR,MTL,1,0,365,1018,124,370,1061,154,16088.0,5550.0
4992,2010,20001,1,68,MTL,0,TOR,MTL,2,0,364,1029,127,363,1025,141,14563.0,6758.2998
5015,2010,20001,3,223,TOR,1,TOR,MTL,2,1,366,1038,128,361,1026,133,12580.0,13833.3
5142,2010,20001,3,232,MTL,0,TOR,MTL,3,1,363,1035,130,372,1045,144,19008.0,6400.0


In [121]:
dg['lndsal'] = numpy.log(dg.hsal/dg.vsal)
dg['lndage'] = numpy.log(dg.hage/dg.vage)
dg['lndhei'] = numpy.log(dg.hheight/dg.vheight)
dg['lndwei'] = numpy.log(dg.hweight/dg.vweight)
dg['y'] = numpy.where(dg.winteamcode==dg.hteamcode, 1, 0)
dg.head()



Unnamed: 0,gamenumber,vteamcode,vsal,vage,vheight,vweight,agoals,hteamcode,hsal,hage,hheight,hweight,agoals.1,winteamcode,lndsal,lndage,lndhei,lndwei,y
0,20001,MTL,38.3833,501,1313,3695,2,TOR,44.613,471,1312,3715,3,TOR,0.1504,-0.0617,-0.0008,0.0054,1
1,20002,PHI,50.365,498,1317,3669,3,PIT,49.1924,497,1308,3611,2,PHI,-0.0236,-0.002,-0.0069,-0.0159,0
2,20003,CAR,35.825,471,1315,3574,4,MIN,40.18,503,1321,3734,3,CAR,0.1147,0.0657,0.0046,0.0438,0
3,20004,CHI,50.56,462,1326,3700,3,COL,33.75,469,1306,3627,4,COL,-0.4042,0.015,-0.0152,-0.0199,1
4,20005,CGY,39.765,505,1331,3726,0,EDM,41.4,458,1324,3745,4,EDM,0.0403,-0.0977,-0.0053,0.0051,1


In [122]:
dl['lndsal'] = numpy.log(dl.hsal/dl.vsal)
dl['lndage'] = numpy.log(dl.hage/dl.vage)
dl['lndhei'] = numpy.log(dl.hheight/dl.vheight)
dl['lndwei'] = numpy.log(dl.hweight/dl.vweight)
dl['y'] = numpy.where(dl.eventteamcode==dl.hteamcode, 1, 0)
dl.head()


Unnamed: 0,season,gamenumber,period,eventnumber,eventteamcode,ishgoal,hteamcode,vteamcode,hgoals,vgoals,hheight,hweight,hage,vheight,vweight,vage,hsal,vsal,lndsal,lndage,lndhei,lndwei,y
5150,2010,20001,1,35,TOR,1,TOR,MTL,0,0,365,1042,134,369,1059,126,12700.0,11125.0,0.1324,0.0616,-0.0109,-0.0162,1
5092,2010,20001,1,49,TOR,1,TOR,MTL,1,0,365,1018,124,370,1061,154,16088.0,5550.0,1.0643,-0.2167,-0.0136,-0.0414,1
4992,2010,20001,1,68,MTL,0,TOR,MTL,2,0,364,1029,127,363,1025,141,14563.0,6758.2998,0.7677,-0.1046,0.0028,0.0039,0
5015,2010,20001,3,223,TOR,1,TOR,MTL,2,1,366,1038,128,361,1026,133,12580.0,13833.3,-0.095,-0.0383,0.0138,0.0116,1
5142,2010,20001,3,232,MTL,0,TOR,MTL,3,1,363,1035,130,372,1045,144,19008.0,6400.0,1.0886,-0.1023,-0.0245,-0.0096,0


# Regression analysis

## Games

In [123]:
_X = dg[['lndage', 'lndhei', 'lndwei', 'lndsal']]
X = sm.add_constant(_X)
Y = dg['y']

mg= sm.OLS(Y, X).fit()
mg.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.031
Model:,OLS,Adj. R-squared:,0.027
Method:,Least Squares,F-statistic:,9.638
Date:,"Mon, 26 Mar 2018",Prob (F-statistic):,1.13e-07
Time:,19:45:29,Log-Likelihood:,-872.73
No. Observations:,1230,AIC:,1755.0
Df Residuals:,1225,BIC:,1781.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.5179,0.014,36.796,0.000,0.490,0.546
lndage,0.1518,0.242,0.626,0.531,-0.324,0.627
lndhei,1.1054,1.285,0.860,0.390,-1.415,3.626
lndwei,-0.9445,0.877,-1.077,0.282,-2.665,0.776
lndsal,0.2171,0.047,4.598,0.000,0.124,0.310

0,1,2,3
Omnibus:,1.001,Durbin-Watson:,1.951
Prob(Omnibus):,0.606,Jarque-Bera (JB):,180.995
Skew:,-0.07,Prob(JB):,4.98e-40
Kurtosis:,1.126,Cond. No.,107.0


## Goals

In [124]:
_X = dl[['lndage', 'lndhei', 'lndwei', 'lndsal']]
X = sm.add_constant(_X)
Y = dl['y']

ml = sm.OLS(Y, X).fit()
ml.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.163
Model:,OLS,Adj. R-squared:,0.162
Method:,Least Squares,F-statistic:,327.1
Date:,"Mon, 26 Mar 2018",Prob (F-statistic):,1.5100000000000001e-257
Time:,19:45:29,Log-Likelihood:,-4276.5
No. Observations:,6725,AIC:,8563.0
Df Residuals:,6720,BIC:,8597.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.5139,0.006,92.081,0.000,0.503,0.525
lndage,-0.1231,0.048,-2.541,0.011,-0.218,-0.028
lndhei,2.1778,0.167,13.053,0.000,1.851,2.505
lndwei,-0.8533,0.158,-5.393,0.000,-1.164,-0.543
lndsal,0.0902,0.009,9.547,0.000,0.072,0.109

0,1,2,3
Omnibus:,3.881,Durbin-Watson:,1.952
Prob(Omnibus):,0.144,Jarque-Bera (JB):,697.068
Skew:,-0.059,Prob(JB):,4.3e-152
Kurtosis:,1.427,Cond. No.,40.6


## Games

Salary increases the likelihood of wins. 
Specifically, each percentage increase in the difference in roster salary of competing teams increase the likelihood of a win by 0.22 points. 
The physical characteristic variables are not statistically significant. However, the signs of the coefficients indicate that increases in roster age and height increase the probability of a team win, increases in roster weight decreases the probability of a team win.

## Goals
Salary increases the likelihood of goals. Specifically, each percentage increase in the difference in players on the ice of competing teams increases the likelihood of a goal by 0.09 points. The physical characteristic variables are statistically significant. 
Each percentage increase in the difference in line age decreases the likelihood of a team goal by 0.12 points.
Each percentage increase in the difference in line height increases the likelihood of a team goal by 2.18 points.
Each percentage increase in the difference in line weight decreases the likelihood of a team goal by 0.85 points, holding all else constant.

## Conclusion

It appears height has a positive impact on production. Age and weight decrease production. Additional game observations would be beneficial to analyze and determine if the results are similar to the goal results.