Custom Model

Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn.metrics as metrics

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve, CalibrationDisplay


import matplotlib.ticker as mtick
import seaborn as sns

import sklearn
import imblearn


from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler

sns.set()

In [33]:
# Import comet_ml at the top of your file
from comet_ml import Experiment

# Create an experiment with your api key
experiment = Experiment(
    api_key="UGYDiy3HENiE7Y3dqoMAVIgG2",
    project_name="custom-models",
    workspace="ift6758a-a22-g3-projet",
)

# Report multiple hyperparameters using a dictionary:
hyper_params = {
    "learning_rate": 0.01,
    "batch_size": 50,
    "num_epochs": 10,
}
experiment.log_parameters(hyper_params)


COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/ift6758a-a22-g3-projet/custom-models/460be0d968d74336b75244e1bed91c95
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     loss [13100] : (0.6475610136985779, 0.8076536655426025)
COMET INFO:   Parameters:
COMET INFO:     batch_size    : 50
COMET INFO:     learning_rate : 0.5
COMET INFO:     num_epochs    : 10
COMET INFO:   Uploads:
COMET INFO:     conda-environment-definition : 1
COMET INFO:     conda-info                   : 1
COMET INFO:     conda-specification          : 1
COMET INFO:     environment details          : 1
COMET INFO:     filename                     : 1
COMET INFO:     git metadata                 : 1
COMET INFO:     git-patch (uncompressed)     : 1 (6.33 KB)
COMET INFO:     installed packages           : 1
COMET INFO:    

In [3]:
df = pd.read_csv('../ift6758/data/extracted/shot_goal_20151007_20210707.csv')

df_dropped = df[(df['season'].isin([20152016, 20162017, 20172018, 20182019])) & (df['gameType'] == 'R') & (
            df['periodType'] != 'SHOOTOUT')]

df_filtered = df_dropped[['speedOfChangeOfAngle', 'speed', 'changeOfAngleFromPrev', 'rebound', 'distanceFromPrev'
                          , 'secondsSincePrev', 'prevAngleWithGoal', 'prevY', 'prevX', 'prevEvent', 'prevSecondsSinceStart',
                          'angleWithGoal', 'distanceToGoal', 'x', 'y', 'emptyNet', 'strength', 'secondsSinceStart', 'shotType', 'isGoal']]

columns_count = len(df_filtered.columns) - 1

df_filtered['emptyNet'] = df_filtered['emptyNet'].fillna(0)
df_filtered['strength'] = df_filtered['strength'].fillna('Even')
df_filtered = df_filtered.dropna()

df_filtered.head(1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['emptyNet'] = df_filtered['emptyNet'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['strength'] = df_filtered['strength'].fillna('Even')


Unnamed: 0,speedOfChangeOfAngle,speed,changeOfAngleFromPrev,rebound,distanceFromPrev,secondsSincePrev,prevAngleWithGoal,prevY,prevX,prevEvent,prevSecondsSinceStart,angleWithGoal,distanceToGoal,x,y,emptyNet,strength,secondsSinceStart,shotType,isGoal
0,0.0,14.025066,0.0,False,154.275727,11.0,98.365886,-34.0,94.0,Hit,40.0,10.00798,34.525353,-55.0,6.0,0.0,Even,51,Wrist Shot,0.0


In [4]:
df_filtered['strength'].unique()

array(['Even', 'Power Play', 'Short Handed'], dtype=object)

In [5]:
df_filtered.isna().sum()

speedOfChangeOfAngle     0
speed                    0
changeOfAngleFromPrev    0
rebound                  0
distanceFromPrev         0
secondsSincePrev         0
prevAngleWithGoal        0
prevY                    0
prevX                    0
prevEvent                0
prevSecondsSinceStart    0
angleWithGoal            0
distanceToGoal           0
x                        0
y                        0
emptyNet                 0
strength                 0
secondsSinceStart        0
shotType                 0
isGoal                   0
dtype: int64

In [6]:
dummy_object = pd.get_dummies(df_filtered[['strength', 'shotType', 'prevEvent']])
df_filtered = df_filtered.merge(dummy_object, left_index=True, right_index=True)
df_filtered = df_filtered.drop(labels = ['strength', 'shotType', 'prevEvent'], axis = 1)

In [42]:
columns_count = len(df_filtered.columns)

Split Train & Validation Dataset

In [7]:
train, test = train_test_split(df_filtered, test_size=0.33, random_state=42)
print(train.shape)
print(test.shape)

(206814, 36)
(101864, 36)


In [8]:
x_train = train[['speedOfChangeOfAngle', 'speed', 'changeOfAngleFromPrev', 'rebound',
       'distanceFromPrev', 'secondsSincePrev', 'prevAngleWithGoal', 'prevY',
       'prevX', 'prevSecondsSinceStart', 'angleWithGoal', 'distanceToGoal',
       'x', 'y', 'emptyNet', 'secondsSinceStart', 'isGoal', 'strength_Even',
       'strength_Power Play', 'strength_Short Handed', 'shotType_Backhand',
       'shotType_Deflected', 'shotType_Slap Shot', 'shotType_Snap Shot',
       'shotType_Tip-In', 'shotType_Wrap-around', 'shotType_Wrist Shot',
       'prevEvent_Blocked Shot', 'prevEvent_Faceoff', 'prevEvent_Giveaway',
       'prevEvent_Goal', 'prevEvent_Hit', 'prevEvent_Missed Shot',
       'prevEvent_Penalty', 'prevEvent_Shot', 'prevEvent_Takeaway']]#.to_numpy().reshape(-1, columns_count)

y_train = train['isGoal']#.to_numpy()

x_train, y_train = RandomOverSampler().fit_resample(x_train, y_train)

x_test = test[['speedOfChangeOfAngle', 'speed', 'changeOfAngleFromPrev', 'rebound',
       'distanceFromPrev', 'secondsSincePrev', 'prevAngleWithGoal', 'prevY',
       'prevX', 'prevSecondsSinceStart', 'angleWithGoal', 'distanceToGoal',
       'x', 'y', 'emptyNet', 'secondsSinceStart', 'isGoal', 'strength_Even',
       'strength_Power Play', 'strength_Short Handed', 'shotType_Backhand',
       'shotType_Deflected', 'shotType_Slap Shot', 'shotType_Snap Shot',
       'shotType_Tip-In', 'shotType_Wrap-around', 'shotType_Wrist Shot',
       'prevEvent_Blocked Shot', 'prevEvent_Faceoff', 'prevEvent_Giveaway',
       'prevEvent_Goal', 'prevEvent_Hit', 'prevEvent_Missed Shot',
       'prevEvent_Penalty', 'prevEvent_Shot', 'prevEvent_Takeaway']]#.to_numpy().reshape(-1, columns_count)

y_test = test['isGoal']#.to_numpy()

In [9]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(375514, 36)
(375514,)
(101864, 36)
(101864,)


In [38]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(36, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 1)
        
    def forward(self, x):
        x = self.fc1(x)
        x = F.dropout(x, p=0.1)
        x = F.relu(x)

        x = self.fc2(x)
        x = F.dropout(x, p=0.1)
        x = F.relu(x)

        x = self.fc3(x)
        x = F.dropout(x, p=0.1)
        x = F.relu(x)

        x = self.fc4(x)
        x = F.sigmoid(x)
        
        return x
    
net = Net()

In [39]:
batch_size = hyper_params["batch_size"]
num_epochs = hyper_params["num_epochs"]
learning_rate = hyper_params["learning_rate"]
batch_no = len(x_train) // batch_size

#criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [86]:
x_train_t = torch.tensor(x_train.to_numpy(dtype=np.float32), dtype = torch.float32)
y_train_t = torch.tensor(y_train.values, dtype = torch.float32)

x_test_t = torch.tensor(x_test.to_numpy(dtype=np.float32), dtype = torch.float32)
y_test_t = torch.tensor(y_test.values, dtype = torch.float32)

In [87]:
from sklearn.utils import shuffle
from torch.autograd import Variable

for epoch in range(num_epochs):
    if epoch % 5 == 0:
        print('Epoch {}'.format(epoch+1))
    x_train_t, y_train_t = shuffle(x_train_t, y_train_t)
    # Mini batch learning
    loss_sum = 0
    for i in range(batch_no):
        start = i * batch_size
        end = start + batch_size
        x_var = Variable(torch.FloatTensor(x_train_t[start:end]))
        y_var = Variable(torch.FloatTensor(y_train_t[start:end]))
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        ypred_var = net(x_var)
        loss =criterion(ypred_var, y_var[:,None])
        loss.backward()
        loss_sum = loss_sum + loss
        print(loss)
        optimizer.step()
    #print(loss_sum/batch_no)

Epoch 1
tensor(27.3984, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(42., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(42., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(38., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(44., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(46., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(44., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(70., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56., grad_fn



tensor(56., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(28., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(42., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(40., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(44., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(46., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(46., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(46., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(42., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(42., grad_fn=<BinaryCros

KeyboardInterrupt: 

In [66]:
ypred_var[0:10]

tensor([[1.0000],
        [1.0000],
        [0.9170],
        [1.0000],
        [0.9908],
        [1.0000],
        [1.0000],
        [1.0000],
        [1.0000],
        [1.0000]], grad_fn=<SliceBackward0>)

In [67]:
torch.round(ypred_var).to(torch.int)

tensor([[1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1],
        [1]], dtype=torch.int32)

In [17]:
# Evaluate the model
test_var = Variable(torch.FloatTensor(x_test_t), requires_grad=True)
with torch.no_grad():
    result = net(test_var)
values = torch.round(result[:, 0])

num_right = np.sum(values.data.numpy().astype(int) == y_test)
print('Num Right', num_right)
print('Accuracy {:.2f}'.format(num_right / len(y_test_t)))

Num Right 92654
Accuracy 0.91


In [119]:
y_test_t

tensor([0, 0, 1,  ..., 1, 0, 0])

In [125]:
values.data.numpy().astype(int)

array([0, 0, 0, ..., 0, 0, 0])

In [120]:

torch.round(result[:, 0])

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [167]:
print(np.sum(y_test))
print(torch.sum(result[:, 1]).item())

9210.0
101864.0


In [18]:
from torchmetrics import ConfusionMatrix
from torchmetrics.classification import BinaryF1Score

target_m = torch.tensor(y_test.to_numpy()).to(torch.int)
pred_m = torch.tensor(result[:, 1]).to(torch.int)

confmat = ConfusionMatrix(num_classes=2)
confmat(target_m, pred_m)

  pred_m = torch.tensor(result[:, 1]).to(torch.int)


tensor([[92654,  9210],
        [    0,     0]])

In [19]:
metric = BinaryF1Score()
print('F1')
print(metric(pred_m, target_m).item())

F1
0.0
