In [1]:
%matplotlib inline

In [2]:
import re
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import backend as K
import random
import numpy as np

random.seed(1)
np.random.seed(2)
tf.random.set_seed(3)
tf.keras.backend.set_floatx('float32')

Using TensorFlow backend.


In [3]:
data = pd.read_csv('netdata.csv', dtype='float32', converters={'PassResult': lambda x: 'R' if pd.isna(x) else x})
data = data.drop(data[~data["playId"].isin(data["playId"].unique()[:10])].index)
data.head()

  """Entry point for launching an IPython kernel.


Unnamed: 0,X,gameId,playId,frame.id,OL_C_x,OL_C_y,OL_LG_x,OL_LG_y,OL_LT_x,OL_LT_y,...,Def_9_x,Def_9_y,Def_10_x,Def_10_y,Def_11_x,Def_11_y,PlayResult,sack.ind,PassResult,num_vec
0,1.0,2017091000.0,68.0,1.0,26.59,23.433332,26.16,24.993334,26.030001,26.723333,...,35.380001,35.143333,42.490002,24.393333,28.469999,43.843334,0.0,0.0,I,1.0
1,2.0,2017091000.0,68.0,2.0,26.59,23.423334,26.16,24.993334,26.030001,26.723333,...,35.369999,35.143333,42.490002,24.403334,28.469999,43.843334,0.0,0.0,I,2.0
2,3.0,2017091000.0,68.0,3.0,26.58,23.423334,26.16,24.993334,26.030001,26.723333,...,35.369999,35.143333,42.5,24.413334,28.469999,43.843334,0.0,0.0,I,3.0
3,4.0,2017091000.0,68.0,4.0,26.58,23.423334,26.16,24.993334,26.030001,26.723333,...,35.369999,35.143333,42.52,24.453333,28.469999,43.843334,0.0,0.0,I,4.0
4,5.0,2017091000.0,68.0,5.0,26.57,23.423334,26.16,24.983334,26.030001,26.723333,...,35.369999,35.153332,42.639999,24.673334,28.48,43.843334,0.0,0.0,I,5.0


In [4]:
data.drop('X', inplace=True, axis=1)
data.fillna('Run', inplace=True)
print(f'New dimensions of DataFrame: {data.shape}')

New dimensions of DataFrame: (2490, 61)


In [5]:
num_plays = data["playId"].unique().size
print(f'Number of unique plays: {num_plays}')
print(f'Average number of frames per play {round(data.shape[0] / num_plays, 2)}')


Number of unique plays: 10
Average number of frames per play 249.0


![Title](play68.gif)

In [6]:
last_5_play_ids = data["playId"].unique()[-5:]
test_data = data.loc[data["playId"].isin(last_5_play_ids)]
train_data = data.loc[~data["playId"].isin(last_5_play_ids)]
print(last_5_play_ids)


[189. 210. 395. 427. 449.]


In [7]:
print(test_data.columns)

Index(['gameId', 'playId', 'frame.id', 'OL_C_x', 'OL_C_y', 'OL_LG_x',
       'OL_LG_y', 'OL_LT_x', 'OL_LT_y', 'OL_RG_x', 'OL_RG_y', 'OL_RT_x',
       'OL_RT_y', 'X_Match_LT', 'Y_Match_LT', 'X_Match_LG', 'Y_Match_LG',
       'X_Match_C', 'Y_Match_C', 'X_Match_RG', 'Y_Match_RG', 'X_Match_RT',
       'Y_Match_RT', 'Off_1_x', 'Off_1_y', 'Off_2_x', 'Off_2_y', 'Off_3_x',
       'Off_3_y', 'Off_4_x', 'Off_4_y', 'Off_5_x', 'Off_5_y', 'Off_6_x',
       'Off_6_y', 'Def_1_x', 'Def_1_y', 'Def_2_x', 'Def_2_y', 'Def_3_x',
       'Def_3_y', 'Def_4_x', 'Def_4_y', 'Def_5_x', 'Def_5_y', 'Def_6_x',
       'Def_6_y', 'Def_7_x', 'Def_7_y', 'Def_8_x', 'Def_8_y', 'Def_9_x',
       'Def_9_y', 'Def_10_x', 'Def_10_y', 'Def_11_x', 'Def_11_y', 'PlayResult',
       'sack.ind', 'PassResult', 'num_vec'],
      dtype='object')


In [8]:
keep_regex = r'(Off|OL_(<?(C|LG|RG|RT|LT)_(x|y))$|Def|frame|Match)'
keep_cols = [c for c in train_data.columns if not re.search(keep_regex, c)]
data_train_for_model = train_data.drop(keep_cols, axis=1)
data_test_for_model = test_data.drop(keep_cols, axis=1)

input_shape = (data_train_for_model.shape[1],)

model = tf.keras.Sequential()
num_input_nodes = 25
num_output_nodes = 1
model.add(tf.keras.layers.Dense(num_input_nodes, input_shape=input_shape, activation=tf.nn.sigmoid))
model.add(tf.keras.layers.Dense(num_input_nodes, activation=tf.nn.sigmoid))
model.add(tf.keras.layers.Dense(num_input_nodes, activation=tf.nn.sigmoid))
model.add(tf.keras.layers.Dense(num_output_nodes, activation=tf.keras.activations.linear))

In [9]:
def mse_loss_with_prior(avg_of_play_no_noise):
    def mse(y_true, y_pred):
        return K.mean(K.square((y_pred - avg_of_play_no_noise) - y_true))

    return mse

In [10]:
model.compile(optimizer='adam',
              loss=mse_loss_with_prior([]),
              metrics=['acc'])


In [11]:
initial_model = tf.keras.models.clone_model(model)
initial_weights = model.get_weights()
initial_model.set_weights(initial_weights)

In [12]:
initial_predictions = initial_model.predict(data_train_for_model)
print(initial_predictions[:10])

[[-1.9341649]
 [-1.9341817]
 [-1.9341805]
 [-1.9341414]
 [-1.9342172]
 [-1.9343321]
 [-1.9343932]
 [-1.9343791]
 [-1.9346864]
 [-1.93153  ]]


In [17]:
prior = 4.0
train_data["NetNoise"] = initial_predictions - prior
# model.compile(loss=mse_loss_with_prior(train_data["NetNoise"]))
print(train_data.drop([c for c in train_data.columns if not re.search(keep_regex, c)], axis=1).head().to_string())
pred = model.predict(train_data.drop([c for c in train_data.columns if not re.search(keep_regex, c)], axis=1))

pred

# pred - train_data["NetNoise"].values


   frame.id  OL_C_x     OL_C_y  OL_LG_x    OL_LG_y    OL_LT_x    OL_LT_y    OL_RG_x    OL_RG_y  OL_RT_x    OL_RT_y  X_Match_LT  Y_Match_LT  X_Match_LG  Y_Match_LG  X_Match_C  Y_Match_C  X_Match_RG  Y_Match_RG  X_Match_RT  Y_Match_RT  Off_1_x    Off_1_y    Off_2_x    Off_2_y    Off_3_x    Off_3_y  Off_4_x    Off_4_y  Off_5_x    Off_5_y    Off_6_x    Off_6_y  Def_1_x    Def_1_y    Def_2_x    Def_2_y    Def_3_x    Def_3_y    Def_4_x    Def_4_y    Def_5_x    Def_5_y    Def_6_x    Def_6_y  Def_7_x    Def_7_y    Def_8_x    Def_8_y    Def_9_x    Def_9_y   Def_10_x   Def_10_y   Def_11_x   Def_11_y
0       1.0   26.59  23.433332    26.16  24.993334  26.030001  26.723333  25.959999  22.183332    25.82  20.483334       28.24   23.143333       28.24   23.143333      28.24  23.143333   28.000000   19.883333   28.000000   19.883333    25.92  43.873333  25.370001  33.983334  24.639999  23.413334    20.98  23.673334    26.09  18.553333  25.639999  23.593334    28.24  23.143333  27.639999  25.653334  3

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


array([[-1.9341649],
       [-1.9341817],
       [-1.9341805],
       ...,
       [-1.8841602],
       [-1.8829361],
       [-1.8831102]], dtype=float32)