In [1]:
import os
import random
import platform
from IPython.display import display


#package imports
import numpy as np
import pandas as pd
from tqdm import tqdm

#plotting imports
#import seaborn as sns
import matplotlib.pyplot as plt

#local imports
from utils.helpers import find_play_type, reverse_play_direction
from utils.logging import create_logger

#Stats imports
#from scipy import stats

from sklearn import metrics
from sklearn.metrics import mean_squared_error as mse
#from sklearn.metrics import log_loss
from sklearn.base import BaseEstimator
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier,RandomForestClassifier,GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
RS = 1106

#File paths below
#check kaggle or local
if platform.platform() == 'Linux-5.15.133+-x86_64-with-glibc2.31':
    FILE_PATH = '/kaggle/input/nfl-big-data-bowl-2024'
else:
    FILE_PATH = './nfl-big-data-bowl-2024'
GAMES_PATH = os.path.join(FILE_PATH,'games.csv')
PLAYS_PATH = os.path.join(FILE_PATH,'plays.csv')
PLAYERS_PATH = os.path.join(FILE_PATH,'players.csv')
TACKLES_PATH = os.path.join(FILE_PATH,'tackles.csv')

#Tracking weeks
WEEK1_PATH = os.path.join(FILE_PATH,'tracking_week_1.csv')
WEEK2_PATH = os.path.join(FILE_PATH,'tracking_week_2.csv')
WEEK3_PATH = os.path.join(FILE_PATH,'tracking_week_3.csv')
WEEK4_PATH = os.path.join(FILE_PATH,'tracking_week_4.csv')
WEEK5_PATH = os.path.join(FILE_PATH,'tracking_week_5.csv')
WEEK6_PATH = os.path.join(FILE_PATH,'tracking_week_6.csv')
WEEK7_PATH = os.path.join(FILE_PATH,'tracking_week_7.csv')
WEEK8_PATH = os.path.join(FILE_PATH,'tracking_week_8.csv')
WEEK9_PATH = os.path.join(FILE_PATH,'tracking_week_9.csv')

logger = create_logger('./output/log.txt','basicModel')

Logger initialized: basicModel


### Load in all data

In [2]:
games_df = pd.read_csv(GAMES_PATH)
players_df = pd.read_csv(PLAYERS_PATH)
plays_df = pd.read_csv(PLAYS_PATH)
tackles_df = pd.read_csv(TACKLES_PATH)

#tracking week 1
week1_df = pd.read_csv(WEEK1_PATH)
week2_df = pd.read_csv(WEEK2_PATH)
week3_df = pd.read_csv(WEEK3_PATH)
week4_df = pd.read_csv(WEEK4_PATH)
week5_df = pd.read_csv(WEEK5_PATH)
week6_df = pd.read_csv(WEEK6_PATH)
week7_df = pd.read_csv(WEEK7_PATH)
week8_df = pd.read_csv(WEEK8_PATH)
week9_df = pd.read_csv(WEEK9_PATH)

#concat all weeks
all_weeks = pd.concat([week1_df,week2_df,week3_df,week4_df,week5_df,week6_df,week7_df,week8_df,week9_df])
all_weeks = all_weeks.merge(players_df.loc[:,['nflId','position']],how='left')

#reverse all plays so they are all going left to right
all_weeks_left_reversed = reverse_play_direction(all_weeks[all_weeks['playDirection']=='left'])
all_weeks.loc[all_weeks['playDirection']=='left'] = all_weeks_left_reversed


display(all_weeks.sample(4))

#find all pass plays and run plays
pass_plays = find_play_type(plays_df,'pass')
pass_index = plays_df[plays_df['playDescription'].isin(pass_plays)].index
pass_plays_df = plays_df.iloc[pass_index]
run_plays_df = plays_df.loc[~plays_df.index.isin(pass_index)]

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position
2420295,2022091811,281,44848.0,Budda Baker,4,2022-09-18 16:32:52.500000,3.0,ARI,right,95.11,35.36,3.26,2.11,0.33,218.0,92.7,,SS
7635299,2022101605,401,54601.0,Cordell Volson,37,2022-10-16 13:15:35.400000,67.0,CIN,right,44.62,26.72,2.66,1.43,0.27,235.41,168.2,,G
1139458,2022091111,2212,41269.0,Kyle Van Noy,15,2022-09-11 18:14:34.500000,8.0,LAC,right,44.09,26.94,4.02,3.32,0.4,196.06,146.1,,OLB
864074,2022091108,1639,42031.0,Denico Autry,57,2022-09-11 17:38:44.500000,96.0,TEN,left,31.58,25.673333,3.06,3.45,0.31,86.83,99.79,tackle,DE


Number of matches for pass: 5646


In [3]:
plays_df.columns

Index(['gameId', 'playId', 'ballCarrierId', 'ballCarrierDisplayName',
       'playDescription', 'quarter', 'down', 'yardsToGo', 'possessionTeam',
       'defensiveTeam', 'yardlineSide', 'yardlineNumber', 'gameClock',
       'preSnapHomeScore', 'preSnapVisitorScore', 'passResult', 'passLength',
       'penaltyYards', 'prePenaltyPlayResult', 'playResult',
       'playNullifiedByPenalty', 'absoluteYardlineNumber', 'offenseFormation',
       'defendersInTheBox', 'passProbability', 'preSnapHomeTeamWinProbability',
       'preSnapVisitorTeamWinProbability', 'homeTeamWinProbabilityAdded',
       'visitorTeamWinProbilityAdded', 'expectedPoints', 'expectedPointsAdded',
       'foulName1', 'foulName2', 'foulNFLId1', 'foulNFLId2'],
      dtype='object')

## Some Data Organization

In [4]:
# 8 mins
# tracking_run_index = []
# for i,j in tqdm(zip(run_plays_df['gameId'],run_plays_df['playId'])):
#     ind = all_weeks.loc[(all_weeks['gameId'] == i) & (all_weeks['playId'] == j)].index
#     tracking_run_index = tracking_run_index + ind.to_list()

# 8.5 mins
# for i in tqdm(run_plays_df[['gameId','playId']].itertuples()):
#     ind = all_weeks.loc[(all_weeks['gameId'] == i[1]) & (all_weeks['playId'] == i[2])].index
#     tracking_run_index = tracking_run_index + ind.to_list()
if not os.path.exists('./output/run_play_tracking_rev.csv'):
    logger.info('Creating run play tracking csv...')
    #2 mins
    def apply_func(gameId,playId):
        ind = all_weeks.loc[(all_weeks['gameId'] == gameId) & (all_weeks['playId'] == playId)].index.to_list()
        return ind

    run_plays = [apply_func(x,y) for x,y in tqdm(zip(run_plays_df['gameId'],run_plays_df['playId']))]
    tracking_run_index = [item for sublist in run_plays for item in sublist]

    run_play_tracking = all_weeks.iloc[tracking_run_index]
    run_play_tracking.to_csv('./output/run_play_tracking_rev.csv')
else:
    logger.info('Reading run play tracking csv...')
    run_play_tracking = pd.read_csv('./output/run_play_tracking_rev.csv')

Creating run play tracking csv...


6840it [03:09, 36.11it/s]


In [5]:
at_snap = run_play_tracking[run_play_tracking['event'] == 'ball_snap']
POSITION = 'SS'

#some players not on field or improperly labeled
errors = []

test_df = at_snap[at_snap['position'] == POSITION].copy()
test_df = test_df.merge(plays_df.loc[:,['gameId','playId','playResult']],on=['gameId','playId'],how='left')

for play in tqdm(test_df.itertuples()):
    frame = play.frameId

    #frames = play[play['frameId'] == frame]
    ball = at_snap.loc[(at_snap['displayName']=='football') & (at_snap['frameId']==frame) & (at_snap['playId']==play.playId) & (at_snap['gameId']==play.gameId)]

    try:
        dist_to_ball = np.sqrt((play.x -ball['x'])**2 + (play.y-ball['y'])**2)
        test_df.loc[play.Index,'dist_to_ball'] = dist_to_ball.values[0]
    except Exception as e:
        errors.append(e)



6471it [00:51, 125.23it/s]


In [6]:


class BasicModel(BaseEstimator):
    '''
    Takes the data and fits basic models. In custom class for pipeline tweaks. Obviously unnecesary
    '''
    def __init__(self,kwargs):
        self.kwargs = kwargs
        self.rf = RandomForestRegressor(**{'n_estimators':kwargs['n_estimators'],'max_depth':kwargs['max_depth'],'random_state':RS})
        self.gbm = GradientBoostingRegressor(**kwargs)
        self.scale = StandardScaler()
        self.scaled = False
        self.rf_preds = None
        self.gbm_preds = None
        self.type = 'rf'
    
    def fit(self,X,y,scale_data=False):
        '''model fits. can scale if needed'''
        if scale_data:
            X = self.scale.fit_transform(X)
            self.scaled = True
        self.rf.fit(X,y)
        self.gbm.fit(X,y)

    def predict(self,X,y=None, gbm=False):
        '''basic predictions'''
        if self.scaled:
            X = self.scale.transform(X)

        if gbm:
            preds =  self.gbm.predict(X)
            self.gbm_preds = preds
        else:
            preds = self.rf.predict(X)
            self.rf_preds = preds

        self.preds = True
        return preds

    def score(self,X,y,gbm_pred=False):
        '''Can self score if X and y are passed in otherwise use sklearn's score method'''
        if gbm_pred:
            self.gbm_preds = self.predict(X,gbm=gbm_pred)
            return mse(y,self.gbm_preds)
        else:
            self.rf_preds = self.predict(X)
            return mse(y,self.rf_preds)
        

test_df.dropna(inplace=True)
test_cols = ['dist_to_ball','y','x','s','a','dir']

X = test_df[test_cols]
y = test_df['playResult']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=RS)

params = {
    "n_estimators": 500,
    "max_depth": 3,
    "min_samples_split": 2,
    "learning_rate": 0.001,
    "loss": "squared_error",
    "random_state": RS
}

model = BasicModel(params)

model.fit(X_train,y_train,scale_data=True)
gbm_score = model.score(X_test, y_test,gbm_pred=True)
rf_score = model.score(X_test,y_test)

result_median = [y.median() for i in range(len(y_test))]
median_test = mse(y_test,result_median)



print(f'Prediction for yards gained based on {POSITION} position at snap:')
print(f'MSE for GBM: {gbm_score:.3f}')
print(f'MSE for RF: {rf_score:.3f}')
print(f'MSE for Median: {median_test:.3f}')
print(f'Improvement over median GBM: {median_test - gbm_score:.3f}')
print(f'Improvement over median RF: {median_test - rf_score:.3f}')



Prediction for yards gained based on SS position at snap:
MSE for GBM: 45.439
MSE for RF: 45.329
MSE for Median: 46.872
Improvement over median GBM: 1.433
Improvement over median RF: 1.543


### Can I predict player positions? 

In [7]:


###processing
defense = ['FS','SS','MLB','LB','CB','DB','S','ILB','OLB','DL','NT','DE','DT']
at_snap_def = at_snap[at_snap['position'].isin(defense)].copy()


#great generalization
at_snap_def.loc[at_snap_def['position'].isin(['FS','SS','S']),'position'] = 'S'
at_snap_def.loc[at_snap_def['position'].isin(['MLB','LB','ILB','OLB']),'position'] = 'LB'
at_snap_def.loc[at_snap_def['position'].isin(['CB','DB']),'position'] = 'CB'
at_snap_def.loc[at_snap_def['position'].isin(['DL','NT','DT']),'position'] = 'DL'

at_snap_def.position.unique()

test_cols = ['y','x','s','a','o','dir']

#at_snap1 = at_snap.dropna()
X1 = at_snap_def[test_cols]
y1 = at_snap_def['position']

X1_train,X1_test,y1_train,y1_test = train_test_split(X1,y1,test_size=0.2,random_state=RS)

params = {
    "max_depth": 10,
    "min_samples_leaf": 2,
    "learning_rate": 0.1,
    "loss": "log_loss",
    "random_state": RS,
    "verbose":0,
}

model_def = HistGradientBoostingClassifier(**params)

model_def.fit(X1_train,y1_train)

gbm_preds = model_def.predict(X1_test)
print(gbm_preds)

random_pos = [random.choice(y1_test.values) for i in range(len(y1_test))]
median_test1 = metrics.precision_score(y1_test.values,random_pos,average='micro')
gbm_score1 = metrics.precision_score(y1_test.values,gbm_preds,average='micro')

median_test_cr = metrics.classification_report(y1_test.values,random_pos)
gbm_score_cr = metrics.classification_report(y1_test.values,gbm_preds)

# result_median1 = [random.choice(y1_test.values) for i in range(len(y1_test))]
# median_test1 = log_loss(y1_test.values,result_median1)
# gbm_score1 = log_loss(y1_test.values,gbm_preds)




print('Prediction for player postion based on positon at snap:')
print(f'Precision for GBM: {gbm_score1:.3f}')
print(f'Precision for random: {median_test1:.3f}')
print(f'Improvement over random : {gbm_score1 - median_test1:.3f}\n')

print(gbm_score_cr)

['DL' 'S' 'S' ... 'LB' 'DE' 'S']
Prediction for player postion based on positon at snap:
Precision for GBM: 0.591
Precision for random: 0.216
Improvement over random : 0.374

              precision    recall  f1-score   support

          CB       0.79      0.72      0.75      3199
          DE       0.43      0.15      0.22      1649
          DL       0.52      0.74      0.61      2460
          LB       0.53      0.62      0.57      3911
           S       0.61      0.53      0.57      2667

    accuracy                           0.59     13886
   macro avg       0.58      0.55      0.54     13886
weighted avg       0.59      0.59      0.58     13886



In [8]:
at_snap_def

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,y,s,a,dis,o,dir,event,position
437051,2022091103,3126,43299.0,Eli Apple,6,2022-09-11 15:34:11.599999,20.0,CIN,left,46.11,42.493333,0.26,0.10,0.03,282.76,40.96,ball_snap,CB
437110,2022091103,3126,43350.0,Vonn Bell,6,2022-09-11 15:34:11.599999,24.0,CIN,left,48.10,17.413333,2.80,1.53,0.27,270.54,282.83,ball_snap,S
437169,2022091103,3126,43641.0,Mike Hilton,6,2022-09-11 15:34:11.599999,21.0,CIN,left,48.08,32.543333,2.67,1.18,0.26,343.16,1.91,ball_snap,CB
437287,2022091103,3126,44872.0,Chidobe Awuzie,6,2022-09-11 15:34:11.599999,22.0,CIN,left,51.51,11.683333,0.36,0.24,0.04,242.45,66.48,ball_snap,CB
437346,2022091103,3126,44915.0,Trey Hendrickson,6,2022-09-11 15:34:11.599999,91.0,CIN,left,44.50,29.353333,0.35,1.40,0.06,284.22,280.60,ball_snap,DE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9193934,2022102308,905,47787.0,Clelin Ferrell,6,2022-10-23 16:44:26.700000,99.0,LV,left,94.46,29.003333,0.06,0.28,0.03,282.16,217.14,ball_snap,DE
9194010,2022102308,905,47810.0,Johnathan Abram,6,2022-10-23 16:44:26.700000,24.0,LV,left,105.47,20.603333,0.19,0.57,0.02,356.26,56.10,ball_snap,S
9194048,2022102308,905,47817.0,Rock Ya-Sin,6,2022-10-23 16:44:26.700000,26.0,LV,left,95.41,13.583333,0.16,0.41,0.03,268.17,13.22,ball_snap,CB
9194086,2022102308,905,47889.0,Maxx Crosby,6,2022-10-23 16:44:26.700000,98.0,LV,left,94.27,19.243333,0.18,0.17,0.02,269.94,256.83,ball_snap,DE


In [13]:

test_cols = ['y','x','s','a','dir','o']



at_snap1 = at_snap.dropna()

at_snap1.loc[at_snap1['position'].isin(['CB','DB']),'position'] = 'CB'
at_snap1.loc[at_snap1['position'].isin(['MLB','ILB']),'position'] = 'ILB'
at_snap1.loc[at_snap1['position'].isin(['RB','FB']),'position'] = 'RB'
at_snap1.loc[at_snap1['position'].isin(['NT','DT']),'position'] = 'DT'

X1 = at_snap1[test_cols]
y1 = at_snap1['position']

X1_train,X1_test,y1_train,y1_test = train_test_split(X1,y1,test_size=0.2,random_state=RS)

params = {
    "n_estimators": 100,
    "max_depth": 3,
    "min_samples_split": 2,
    "learning_rate": 0.1,
    "loss": "log_loss",
    "random_state": RS,
    "verbose":1,
}

model1 = GradientBoostingClassifier(**params)

model1.fit(X1_train,y1_train)


      Iter       Train Loss   Remaining Time 
         1           2.4460            4.82m
         2           2.2669            4.95m
         3           2.1371            4.90m
         4           2.0494            4.84m
         5           1.9687            4.82m
         6           1.9081            4.76m
         7           1.8508            4.71m
         8           1.8032            4.65m
         9           1.7618            4.60m
        10           1.7217            4.55m
        20           1.4934            4.01m
        30           1.3934            3.52m
        40           1.3390            3.01m
        50           1.3022            2.52m
        60           1.2780            2.02m
        70           1.2594            1.52m
        80           1.2446            1.01m
        90           1.2330           30.25s
       100           1.2232            0.00s


In [14]:


gbm_preds = model1.predict(X1_test)
print(gbm_preds)

random_pos = [random.choice(y1_test.values) for i in range(len(y1_test))]
median_test1 = metrics.precision_score(y1_test.values,random_pos,average='micro')
gbm_score1 = metrics.precision_score(y1_test.values,gbm_preds,average='micro')

median_test_cr = metrics.classification_report(y1_test.values,random_pos)
gbm_score_cr = metrics.classification_report(y1_test.values,gbm_preds)

# result_median1 = [random.choice(y1_test.values) for i in range(len(y1_test))]
# median_test1 = log_loss(y1_test.values,result_median1)
# gbm_score1 = log_loss(y1_test.values,gbm_preds)




print('Prediction for player postion based on positon at snap:')
print(f'Precision for GBM: {gbm_score1:.3f}')
print(f'Precision for random: {median_test1:.3f}')
print(f'Improvement over random : {gbm_score1 - median_test1:.3f}\n')

print(gbm_score_cr)

['OLB' 'C' 'WR' ... 'OLB' 'C' 'T']
Prediction for player postion based on positon at snap:
Precision for GBM: 0.562
Precision for random: 0.076
Improvement over random : 0.486

              precision    recall  f1-score   support

           C       0.58      0.66      0.62      1254
          CB       0.69      0.75      0.72      3102
          DE       0.41      0.16      0.23      1680
          DT       0.47      0.76      0.58      2478
          FS       0.35      0.25      0.29      1444
           G       0.63      0.80      0.70      2360
         ILB       0.40      0.51      0.45      1920
         OLB       0.33      0.27      0.29      2007
          QB       0.61      0.62      0.61      1246
          RB       0.48      0.40      0.43      1535
          SS       0.36      0.18      0.24      1311
           T       0.67      0.75      0.71      2700
          TE       0.57      0.41      0.47      1812
          WR       0.81      0.73      0.77      2920

    accurac

## Linear model

In [11]:
#basic linear model
X_train1 = X_train.drop(columns=['s','dir']).copy()
X_train1['intercept'] = 1

linear_model = sm.OLS(y_train, X_train1).fit()

display(linear_model.summary())

coef = linear_model.params

change_in_unit = pd.DataFrame()

change_in_unit['coef'] = coef
change_in_unit['change_in_unit'] = np.exp(coef)
change_in_unit

0,1,2,3
Dep. Variable:,playResult,R-squared:,0.009
Model:,OLS,Adj. R-squared:,0.008
Method:,Least Squares,F-statistic:,11.9
Date:,"Fri, 10 Nov 2023",Prob (F-statistic):,1.26e-09
Time:,19:16:26,Log-Likelihood:,-17098.0
No. Observations:,5155,AIC:,34210.0
Df Residuals:,5150,BIC:,34240.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
dist_to_ball,0.1060,0.024,4.463,0.000,0.059,0.153
y,-0.0031,0.014,-0.213,0.831,-0.031,0.025
x,-0.0194,0.004,-5.118,0.000,-0.027,-0.012
a,0.0327,0.125,0.263,0.793,-0.211,0.277
intercept,4.4349,0.546,8.121,0.000,3.364,5.505

0,1,2,3
Omnibus:,3198.704,Durbin-Watson:,2.008
Prob(Omnibus):,0.0,Jarque-Bera (JB):,98570.934
Skew:,2.456,Prob(JB):,0.0
Kurtosis:,23.852,Cond. No.,457.0


Unnamed: 0,coef,change_in_unit
dist_to_ball,0.106023,1.111848
y,-0.003059,0.996946
x,-0.019384,0.980802
a,0.032747,1.033289
intercept,4.434889,84.342799


In [12]:
test_score = np.zeros((params["n_estimators"],), dtype=np.float64)
for i, y_pred in enumerate(model.gbm.staged_predict(X_test)):
    test_score[i] = mse(y_test, y_pred)

fig = plt.figure(figsize=(6, 6))
plt.subplot(1, 1, 1)
plt.title("Deviance")
plt.plot(
    np.arange(params["n_estimators"]) + 1,
    model.gbm.train_score_,
    "b-",
    label="Training Set Deviance",
)
plt.plot(
    np.arange(params["n_estimators"]) + 1, test_score, "r-", label="Test Set Deviance"
)
plt.legend(loc="upper right")
plt.xlabel("Boosting Iterations")
plt.ylabel("Deviance")
fig.tight_layout()
plt.show()



IndexError: index 100 is out of bounds for axis 0 with size 100

### Get the frames we might want to keep for a robust model based on movement

In [None]:
frame_counts = {}
list_of_frames = []

#average frames
for game in tqdm(run_play_tracking['gameId'].unique()):
    frame_counts[game] = {}
    for play in run_play_tracking.loc[run_play_tracking['gameId'] == game]['playId'].unique():
        frames = run_play_tracking[(run_play_tracking['gameId'] == game) & (run_play_tracking['playId'] == play)]['frameId'].mean().astype(int)
        frame_counts[game][play] = frames
        list_of_frames.append(frames)

#trim the amount of plays with less than some percentile of frames. We need every play in a tensor to have the same amount of frames
#list_of_frames.sort()
frame_threshold = np.percentile(list_of_frames, 30)
print(frame_threshold)
        

  7%|▋         | 9/136 [00:08<02:00,  1.06it/s]


KeyboardInterrupt: 

In [None]:
#only plays with more than the threshold selected. then we ONLY take those frames
#this is to make sure that every play has the same amount of frames

keep_plays = {}

for k,v in frame_counts.items():
    keep_plays[k] = []
    for k1,v1 in v.items():
        if v1 > frame_threshold:
            keep_plays[k].append(k1)

keep_plays

{2022091103: [3126,
  253,
  274,
  1672,
  611,
  632,
  4462,
  4631,
  1610,
  520,
  2611,
  2670,
  4154,
  1126,
  1150,
  2291,
  2315,
  852,
  881,
  1493,
  1565,
  3105,
  3825,
  5039,
  1228,
  2483,
  2511,
  1384,
  2719,
  955,
  1016,
  2955,
  1315,
  1363,
  3011,
  1700,
  364,
  388,
  2201,
  2222,
  3755,
  1947,
  1968,
  2037,
  3251,
  4686,
  1037,
  4279,
  4353,
  743,
  764,
  319,
  3613,
  3680,
  1105,
  111,
  2462,
  1926,
  3294,
  3389,
  1589,
  3464,
  4789,
  340,
  2172,
  902,
  4710],
 2022091112: [2370,
  2475,
  3454,
  3492,
  3530,
  1994,
  1613,
  2612,
  2640,
  3416,
  1458,
  2501,
  965,
  86,
  1539,
  573,
  3551,
  2886,
  2072,
  181,
  3627,
  3002,
  3029,
  1144,
  1165,
  2931,
  2981,
  3766,
  1501,
  62,
  2246,
  2294,
  2167,
  2225,
  1260,
  736,
  1560,
  3589,
  3176,
  2315,
  1207,
  3128,
  3672,
  3696,
  917,
  991],
 2022091107: [723,
  3669,
  3443,
  3026,
  1260,
  1378,
  254,
  275,
  2285,
  2314,
  1994,

## A model to look at run plays and play results
Will only look at x,y,s,a,dis(or o)

In [None]:
import torch  # noqa: E402
import torch.nn as nn  # noqa: E402

NUM_SAMPLES = 800
NUM_FRAMES = 14
NUM_PLAYERS = 23
DIMENSION_NAMES = ['x','y','s','a','o']
DIMENSIONS = len(DIMENSION_NAMES)+1



class Preprocess(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Parameter(torch.randn(1,1,3))
        self.l2 = nn.Parameter(torch.randn(1,1,3))
        self.l3 = nn.Parameter(torch.randn(1,1,3))
        
    def forward(self,x):
        x = torch.cat([x,self.l1],dim=1)
        x = torch.cat([x,self.l2],dim=1)
        x = torch.cat([x,self.l3],dim=1)
        return x

#trying to create a tensor
def create_tensor(df):
    X = np.zeros([NUM_SAMPLES, NUM_FRAMES, NUM_PLAYERS, DIMENSIONS], dtype=np.float32)
    y = np.zeros([NUM_SAMPLES], dtype=np.int32)

    for i in df['gameId'].unique():
        for j in df.loc[df['gameId'] == i]['playId'].unique():
            for p in df.loc['nflId'].unique():
                pass
            
