# Objective

1. Create a frame classification pipeline based on players that are n yard apart
2. Use their helmet bounding boxes to crop out the part of the frame the contains both of them
2. Run a frame classification pipeline on these images

# Steps

1. Fitler data for close players based on distance threshold
2. Crop a specific frame between two players based on bboxes and visualize
3. Create fastai pipeline

# Configuration

In [None]:
class CFG:
    seed = 4121995
    undersample_no_contact = True
    img_size = 224
    model = 'resnet18'  
    epochs = 1
    train = False
    valid = False
    infer = True
    thresh = 0.3
    dist_thresh = 1
    model_name = 'model-1.pkl'

In [None]:
LS = !ls
IS_KAGGLE = 'init.sh' not in LS
IS_KAGGLE

# Load libraries

In [None]:
import sys

if IS_KAGGLE:
    sys.path.append('/kaggle/input/timm-0-6-9/pytorch-image-models-master')
    CFG.frames_path = ''
    CFG.utils_path = '/kaggle/input/nflutils'
    
    sys.path.insert(0, '../input/nflutils')
    !mkdir -p nflutils
    !cp ../input/nflutils/*.py nflutils/
    
else:
    CFG.frames_path = 'frames/content/work/frames/train'
    CFG.utils_path = 'nflutils'

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

import pickle
import timm

from pathlib import Path

from nflutils.dataprep import *

from tqdm.notebook import tqdm

from sklearn.metrics import matthews_corrcoef

# Read Files

In [None]:
if IS_KAGGLE:
    BASE_DIR = Path("../input/nfl-player-contact-detection")
    OUT_DIR = Path("/kaggle/working/")
else:
    BASE_DIR = Path("nfl-player-contact-detection")
    OUT_DIR = Path("nfl-player-contact-detection/frames")

In [None]:
ss = pd.read_csv(BASE_DIR/"sample_submission.csv")

# Player tracking data
te_tracking = pd.read_csv(
    BASE_DIR/"test_player_tracking.csv", parse_dates=["datetime"]
)

# Baseline helmet detection labels
te_helmets = pd.read_csv(BASE_DIR/"test_baseline_helmets.csv")

In [None]:
if CFG.train or CFG.valid:
    df_combo = pd.read_parquet('df_combo.parquet')
    df_combo_with_helmets = pd.read_parquet('df_combo_with_helmets.parquet')

# Create a dataset based on labeled data

In [None]:
from fastai.vision.all import *

import cv2

In [None]:
def get_frame_path(row, frames_path=CFG.frames_path, split=None):
    frame = row['frame'] if len(str(row['frame'])) > 3 else f'0{row["frame"]}'
    game_play = row['game_play']
    view = row['view']
    return f'{frames_path}/{game_play}_{view}.mp4_{frame}.jpg'

def get_frames_df(df_combo, kf_dict, split, sample_every_n_frame=None, sample_train=None, sample_val=None, undersample_no_contact=False, filter_views=None):
    train_game_plays = kf_dict[split]['train_games']
    val_game_plays = kf_dict[split]['val_games']
    
    train_combo = df_combo.query('game_play in @train_game_plays').copy()
    val_combo = df_combo.query('game_play in @val_game_plays').copy()
    
    train_combo['is_valid'] = False
    val_combo['is_valid'] = True
    
    if sample_every_n_frame is not None:
        train_combo = train_combo[(train_combo.frame % sample_every_n_frame) == 0]
        val_combo = val_combo[(val_combo.frame % sample_every_n_frame) == 0]
    
    if sample_train is not None:
        train_combo = train_combo.sample(frac=sample_train, random_state=CFG.seed)
        
    if sample_val is not None:
        val_combo = val_combo.sample(frac=sample_val, random_state=CFG.seed)
        
    if undersample_no_contact:
        train_combo = pd.concat([
            train_combo.query('contact == 1'),
            train_combo.query('contact == 0').sample(
                len(train_combo.query('contact == 1')), random_state=CFG.seed
            )
        ])

    frames_df = pd.concat([train_combo, val_combo], axis=0)
    frames_df.frame = frames_df.frame.astype('int') 
    
    if filter_views is not None:
        frames_df = frames_df.query('view in @filter_views')
        
    frames_df['path'] = frames_df.apply(lambda x: get_frame_path(x), axis=1)
        
    return frames_df

def get_img(row, add_helmets=True):
    
    frame = cv2.imread(row.path)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    if add_helmets:
        frame = cv2.rectangle(frame, 
                              (int(row.left_1), int(row.top_1)),
                              (int(row.left_1+row.width_1), int(row.top_1+row.height_1)),
                              (255, 0, 0), 2)
        
        frame = cv2.rectangle(frame, 
                              (int(row.left_2), int(row.top_2)),
                              (int(row.left_2+row.width_2), int(row.top_2+row.height_2)),
                              (255, 0, 0), 2)
    
    size = CFG.img_size // 2
    
    if row.center_y-size < 0:
        min_y = 0
        max_y = min_y + 2*size
        
    elif row.center_y+size > 719:
        min_y = 719 - 2*size
        max_y = 719
        
    else:
        min_y = row.center_y - size
        max_y = row.center_y + size
    
    if row.center_x-size < 0:
        min_x = 0
        max_x = min_x + 2*size
        
    elif row.center_x+size > 1279:
        min_x = 1279 - 2*size
        max_x = 1279
        
    else:
        min_x = row.center_x - size
        max_x = row.center_x + size
        
    cropped_frame = frame[int(min_y):int(max_y), 
                          int(min_x):int(max_x), :]
    return cropped_frame

def get_label(row):
    return ['no contact', 'contact'][row.contact]


def get_dls(frames_df, item_tfms=None, batch_tfms=None, bs=64, shuffle=True, drop_last=False):
    return DataBlock(
        blocks=(ImageBlock, CategoryBlock),
        get_x=get_img,
        get_y=get_label,
        splitter=ColSplitter(),
        item_tfms=item_tfms,
        batch_tfms=batch_tfms
    ).dataloaders(frames_df, bs=bs, shuffle=shuffle, drop_last=drop_last)


def get_learner(model, dls):
    return vision_learner(dls, model, metrics=accuracy)


def validate_model(learn, df_combo, val_df, thresh=0.3):
    val_dl = learn.dls.test_dl(val_df, with_labels=True)
    preds, _ = learn.get_preds(dl=val_dl) 
    
    val_df.loc[:, 'contact_pred'] = preds.cpu().detach().numpy()[:, 1]
    val_df = add_contact_id(val_df)
    
    val_combo = df_combo[df_combo.game_play.isin(val_df.game_play.unique())].copy()
    
    val_dist["distance"] = val_combo["distance"].fillna(99)  # Fill player to ground with 9    
    val_dist_agg = val_dist.merge(val_df.groupby('contact_id', as_index=False).contact_pred.mean(), how='left', on='contact_id').fillna(0)
    
    out = np.where(val_dist_agg['contact_pred'].isna(),
                   val_dist['distance'] <= 1, 
                   val_dist_agg['contact_pred'] > thresh).astype(int)
    
    print('Baseline', matthews_corrcoef(val_dist_agg['contact'], (val_dist_agg['distance'] <= 1).astype(int)))
    print('Model', matthews_corrcoef(val_dist_agg['contact'], out))
    
    return val_dist

In [None]:
if CFG.train:
    kf_dict = pickle.load(open('kf_dict', 'rb'))
    frames_df = get_frames_df(df_combo_with_helmets, kf_dict, split=0, sample_every_n_frame=3, sample_train=0.01, sample_val=0.01)
    dls = get_dls(frames_df)
    learn = get_learner('resnet18', dls)
    dls.show_batch()
    learn.fine_tune(1)
    learn.export('nflutils/model-1.pkl')

In [None]:
if CFG.valid:
    cpu = not torch.cuda.is_available()
    learn = load_learner('nflutils/model-1.pkl', cpu=cpu)
    
    frames_df = get_frames_df(df_combo_with_helmets, kf_dict, split=0, sample_every_n_frame=6, sample_train=0.01, sample_val=0.1)
    val_df = frames_df.query('is_valid').copy()
    
    validate_model(learn, df_combo, val_df, thresh=0.3)

# Inference

In [None]:
if CFG.infer:
    cpu = not torch.cuda.is_available()
    learn = load_learner(CFG.utils_path+'/'+CFG.model_name, cpu=cpu)

In [None]:
if CFG.infer:
    ss = expand_contact_id(ss)
    ss_dist = compute_distance(ss, te_tracking, merge_col="step")
    ss_dist_with_helmets = merge_tracking_and_helmets(ss_dist.query('distance <= 1.6'), te_helmets)
    ss_dist_with_helmets = calc_two_players_helmets_center(ss_dist_with_helmets)
    ss_dist_with_helmets = ss_dist_with_helmets.query("view != 'Endzone2'")
    ss_dist_with_helmets = ss_dist_with_helmets[~ss_dist_with_helmets.view.isna()]
    ss_dist_with_helmets = ss_dist_with_helmets[~ss_dist_with_helmets.left_2.isna()]
    ss_dist_with_helmets = ss_dist_with_helmets.astype({'frame': 'int'})

In [None]:
if CFG.infer:
    !mkdir -p test
    !chmod 777 test
    
    g_paths = !ls /kaggle/input/nfl-player-contact-detection/test/$g
    for g_path in tqdm(g_paths):
        # Need to account for different 
        game_play = g_path.split('/')[-1].split('/')[-1]
        if "Endzone2" not in game_play:
            !ffmpeg -i "/kaggle/input/nfl-player-contact-detection/test/$g_path" -q:v 2 -f image2 test/"$game_play"_%04d.jpg -hide_banner -loglevel error

In [None]:
if CFG.infer:
    ss_dist_with_helmets['path'] = ss_dist_with_helmets.apply(lambda x: get_frame_path(x, '/kaggle/working/test', 'test'), axis=1)
    ss_dist_with_helmets = add_contact_id(ss_dist_with_helmets)
    display(ss_dist_with_helmets.head())

In [None]:
if CFG.infer:
    test_dl = learn.dls.test_dl(ss_dist_with_helmets)
    preds, _ = learn.get_preds(dl=test_dl) 
    
    ss_dist_with_helmets.loc[:, 'contact_pred'] = preds.cpu().detach().numpy()[:, 1]
    ss_dist_with_helmets = ss_dist_with_helmets.groupby('contact_id', as_index=False).agg({'contact_pred': 'mean', 'distance': 'first'})
    # infer_frames_df.loc[:, 'contact'] = np.where(infer_frames_df['contact_pred'].isna(),
    #                                              infer_frames_df['distance'] <= 1, 
    #                                              infer_frames_df['contact_pred'] > 0.5).astype(int)
    
                                        # (infer_frames_df['contact_pred'] > CFG.thresh) & \
                                        # (infer_frames_df['distance'] <= CFG.dist_thresh).astype(int)

In [None]:
if CFG.infer:
    sub = pd.read_csv('/kaggle/input/nfl-player-contact-detection/sample_submission.csv')

    sub = sub.drop("contact", axis=1).merge(ss_dist_with_helmets[['contact_id', 'contact_pred', 'distance']], how='left', on='contact_id')
    
    sub['contact'] = np.where(sub['contact_pred'].isna(),
                              sub['distance'] <= 1, 
                              sub['contact_pred'] > CFG.thresh).astype(int)
    
    sub[["contact_id", "contact"]].to_csv("submission.csv", index=False)
    sub.head()