# Setup

In [None]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.metrics import brier_score_loss
from sklearn.model_selection import GroupKFold

In [None]:
main_dir = Path(os.getcwd())
data_path = main_dir / "data"
processed_data_path = data_path / 'processed'

In [None]:
fid_cols = ['gameId', 'playId', 'frameId']
playerframe_cols = fid_cols + ['nflId']

In [None]:
df_data = pd.read_parquet(processed_data_path / 'defenders_to_model.parquet')
df_games = pd.read_csv(data_path / 'games.csv')
df_data = df_data.merge(df_games.loc[:, ['gameId', 'week']], on = 'gameId')
df_data = df_data.drop('blockers', axis=1)

In [None]:
# # Filter to 3 games in week 1
# games = df_data[df_data['week']==1].gameId.unique()[:3]
# df_data = df_data[df_data['gameId'].isin(games)]

In [None]:
target = 'xtackle'

features = ['s',
 'a',
 's_bc',
 'a_bc',
 'dist_to_bc',
 'o_to_bc',
 'dir_to_bc',
 'influence',
 'x1',
 'x10',
 'x11',
 'x2',
 'x3',
 'x4',
 'x5',
 'x6',
 'x7',
 'x8',
 'x9',
 'y1',
 'y10',
 'y2',
 'y3',
 'y4',
 'y5',
 'y6',
 'y7',
 'y8',
 'y9',
 'in_block']

In [None]:
model_data = df_data.dropna(subset=features+[target]).reset_index(drop=True)

In [None]:
folds = model_data['gameId'].unique().shape[0]
kf = GroupKFold(folds)
s = pd.Series()

kf_split = kf.split(model_data, groups=model_data['gameId'])

In [None]:
for train_idx, test_idx in kf_split:
    
  test = model_data.iloc[test_idx]
  train = model_data.iloc[train_idx]

  xtrain = xgb.DMatrix(train.loc[:, features], train[target])
  xtest = xgb.DMatrix(test.loc[:, features], test[target])

  p = {
    'objective':'binary:logistic',
    'eta':0.3,
    'reg_lambda':1,
    'scale_pos_weight':1,
  }

  model = xgb.train(p, xtrain)
  _s = pd.Series(model.predict(xtest), index=test.index)
  s = pd.concat([s,_s])

In [None]:
model_data[f'{target}_xgb'] = s

# Eval

In [None]:
model_data.groupby('xtackle').agg({'nflId' : 'count', 'xtackle_xgb' : 'mean'}).reset_index()

In [None]:
brier_score_loss(y_true = model_data['xtackle'], y_prob = model_data['xtackle_xgb'])

In [None]:
model_data.to_parquet(processed_data_path / 'model_results.parquet')