# Simple xGoals Model
The purpose of this notebook is to show how expected goals models are created. Nowadays, xGoals models take much more data than we will be using for our predictions. For our model well be using the x-coordinate and y-coordinate of the shooter and the shot type, from this we will attempt to predict the chance of the shot being a goal using data from the recent 18-19 NHL regular season. For the data we'll be using shots from 5x5 in regular time and well only be using shots on target, misses and goals because for blocks the NHL stores the co-ordinates as that of the blocking player not the shooter.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from sklearn.neural_network import MLPClassifier
import pickle
from mpl_toolkits import mplot3d

In [2]:
shots = pd.read_csv('shot_dataRAW18.csv')
twenty17 = pd.read_csv('shot_dataRAW17.csv')
shots = shots.append(twenty17)
shots.head()

Unnamed: 0.1,Unnamed: 0,Game_Id,Date,Period,Event,Description,Time_Elapsed,Seconds_Elapsed,Strength,Ev_Zone,...,Away_Score,Home_Score,Away_Goalie,Away_Goalie_Id,Home_Goalie,Home_Goalie_Id,xC,yC,Home_Coach,Away_Coach
0,0,20001,2018-10-03,1,PSTR,Period Start- Local time: 7:16 EDT,0:00,0.0,5x5,,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,,,MIKE BABCOCK,CLAUDE JULIEN
1,1,20001,2018-10-03,1,FAC,MTL won Neu. Zone - MTL #13 DOMI vs TOR #34 MA...,0:00,0.0,5x5,Neu,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,0.0,0.0,MIKE BABCOCK,CLAUDE JULIEN
2,2,20001,2018-10-03,1,SHOT,"MTL ONGOAL - #62 LEHKONEN, Backhand, Off. Zone...",0:29,29.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,78.0,-19.0,MIKE BABCOCK,CLAUDE JULIEN
3,3,20001,2018-10-03,1,SHOT,"TOR ONGOAL - #44 RIELLY, Snap, Off. Zone, 52 ft.",0:49,49.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,-37.0,-10.0,MIKE BABCOCK,CLAUDE JULIEN
4,4,20001,2018-10-03,1,SHOT,"MTL ONGOAL - #11 GALLAGHER, Wrist, Off. Zone, ...",1:00,60.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,47.0,-23.0,MIKE BABCOCK,CLAUDE JULIEN


In [3]:
shots = shots[((shots['Event'] == 'MISS') | (shots['Event'] == 'SHOT') | (shots['Event'] == 'GOAL')) & ((shots['Strength'] == '5x5') |(shots['Strength'] == '4x4' )) & (shots['Period'] <= 3)]
shots.head()

Unnamed: 0.1,Unnamed: 0,Game_Id,Date,Period,Event,Description,Time_Elapsed,Seconds_Elapsed,Strength,Ev_Zone,...,Away_Score,Home_Score,Away_Goalie,Away_Goalie_Id,Home_Goalie,Home_Goalie_Id,xC,yC,Home_Coach,Away_Coach
2,2,20001,2018-10-03,1,SHOT,"MTL ONGOAL - #62 LEHKONEN, Backhand, Off. Zone...",0:29,29.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,78.0,-19.0,MIKE BABCOCK,CLAUDE JULIEN
3,3,20001,2018-10-03,1,SHOT,"TOR ONGOAL - #44 RIELLY, Snap, Off. Zone, 52 ft.",0:49,49.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,-37.0,-10.0,MIKE BABCOCK,CLAUDE JULIEN
4,4,20001,2018-10-03,1,SHOT,"MTL ONGOAL - #11 GALLAGHER, Wrist, Off. Zone, ...",1:00,60.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,47.0,-23.0,MIKE BABCOCK,CLAUDE JULIEN
13,13,20001,2018-10-03,1,MISS,"MTL #28 REILLY, Snap, Wide of Net, Off. Zone, ...",3:49,229.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,73.0,22.0,MIKE BABCOCK,CLAUDE JULIEN
14,14,20001,2018-10-03,1,SHOT,"MTL ONGOAL - #11 GALLAGHER, Snap, Off. Zone, 3...",3:54,234.0,5x5,Off,...,0,0,CAREY PRICE,8471679.0,FREDERIK ANDERSEN,8475883.0,53.0,14.0,MIKE BABCOCK,CLAUDE JULIEN


In [4]:
#reform the dataframe to contain only what we need to find values
shots = shots[['Period','Ev_Team','Home_Zone','Home_Team','xC','yC','Event']]
shots.head()

Unnamed: 0,Period,Ev_Team,Home_Zone,Home_Team,xC,yC,Event
2,1,MTL,Def,TOR,78.0,-19.0,SHOT
3,1,TOR,Off,TOR,-37.0,-10.0,SHOT
4,1,MTL,Def,TOR,47.0,-23.0,SHOT
13,1,MTL,Def,TOR,73.0,22.0,MISS
14,1,MTL,Def,TOR,53.0,14.0,SHOT


In [5]:
'''
here we will change how the co-ordinates are set up. currently they are set up so that 0,0 id the center dot and then 
the away net is -89, 0 and the home net is 89,0. We will change the cooridnates so that they are no longer rooted
at the center dot but so that 0,0 is actually the net and thus 5,6 would be 5 feet up from the net and 6 feet to the right
think cartesian plane
'''
shots = shots.transpose().to_dict()
for shot in shots:
    if shots[shot]['Period'] == 1 or shots[shot]['Period'] == 3:
        if shots[shot]['Ev_Team'] == shots[shot]['Home_Team']:
            #a hockey rink is 200 feet wide and in the API the center dot is 0,0 so each end is 100ft away
            #the away side is negative x and the home side(at the first and third period) is positive x
            #this will give us the distance from the away net if the shooting team is shooting in the 1/3 period
            #because -84 + 89 = 5 and thats the same distance from the away net of the player
            shots[shot]['xC'] = shots[shot]['xC'] + 89
            #multiply by -1 to flip the sides because we want the right side from the net to be positive from 
            shots[shot]['yC'] = shots[shot]['yC'] * -1

        else:
            shots[shot]['xC'] = 89 - shots[shot]['xC']
           

    else:
        #now we switch the transformations because the teams are on the other side of the ice
        if shots[shot]['Ev_Team'] == shots[shot]['Home_Team']:

            shots[shot]['xC'] = 89 -shots[shot]['xC']
        else:
            shots[shot]['xC'] = shots[shot]['xC'] + 89
            shots[shot]['yC'] = shots[shot]['yC'] * -1
    if shots[shot]['Event'] == 'GOAL':
        shots[shot]['Event'] = 1
    else:
        shots[shot]['Event'] = 0

  import sys


In [6]:
shots = pd.DataFrame(shots).transpose()
shots[shots['xC'] > 64].head()

Unnamed: 0,Ev_Team,Event,Home_Team,Home_Zone,Period,xC,yC
296,WPG,0,WPG,Neu,3,79,27
302,MTL,0,TOR,Neu,3,69,7
330,PIT,0,PIT,Neu,1,98,18
338,PIT,0,PIT,Off,1,129,0
348,PIT,0,PIT,Off,1,124,-29


In [7]:
points = np.where(np.isnan(shots['xC'].values.tolist()))
points = points[0].tolist()
shots = shots.transpose().to_dict()
i = 0
n = 0
j = []
for c in shots:
    if i == points[n]:
        n = n+1
        j.append(c)
    i = i+1
    if n == len(points):
        break
points = j

In [8]:
for vals in points:
    shots.pop(vals)


In [9]:
#change xC to represent the distance(pythagorean theorem) and yC to represent angle from right post
#to left post(directly in front of net will be 90 degrees, 180 will be at the left post on the goal line) 
for shot in shots:
    dist = np.sqrt(shots[shot]['xC'] **2 + shots[shot]['yC'] **2)
    angle = round(math.atan2(shots[shot]['xC'], shots[shot]['yC']) * 180 / math.pi)
    
    if angle == 90:
        angle =0
        side = 0
    elif angle < 90:
        angle = 90 - angle
        side =1
    elif angle > 90 :
        angle = angle - 90
        side = 2

    
    shots[shot]['dist'] = round(dist,1)
    shots[shot]['angle'] = round(angle,1)
shots = pd.DataFrame(shots).transpose()
len(shots)

162675

In [10]:
model = MLPClassifier(activation = 'tanh', learning_rate_init = 0.005, tol= 0.00000001 )

In [11]:
x_train = shots[['dist','angle']].values.reshape(-1,2)
y_train = shots[['Event']].values.reshape(-1,)
y_train=y_train.astype('int')


In [12]:
model = model.fit(x_train,y_train)

In [13]:
filename = 'xGoals_model.sav'
pickle.dump(model, open(filename,'wb'))

In [14]:
mock = np.array([5,5]).reshape(1,2)
model.predict_proba(mock)

array([[0.80774356, 0.19225644]])