# Load libraries

In [1]:
LS = !ls
IS_KAGGLE = 'init.sh' not in LS
IS_KAGGLE

False

In [2]:
import sys

class CFG:
    seed = 4121995

if IS_KAGGLE:
    sys.path.append('/kaggle/input/timm-0-6-9/pytorch-image-models-master')
    CFG.frames_path = ''
    CFG.utils_path = '/kaggle/input/nflutils'
    
    sys.path.insert(0, '../input/nflutils')
    !mkdir -p nflutils
    !cp ../input/nflutils/*.py nflutils/
    
else:
    CFG.frames_path = 'frames/content/work/frames/train'
    CFG.utils_path = 'nflutils'

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

import pickle
import timm
import cv2

from pathlib import Path

from nflutils.dataprep import *

from tqdm.notebook import tqdm

from sklearn.metrics import matthews_corrcoef

from fastai.vision.all import *

pd.set_option('display.max_columns', 500)

# Read Files

In [4]:
if IS_KAGGLE:
    BASE_DIR = Path("../input/nfl-player-contact-detection")
    OUT_DIR = Path("/kaggle/working/")
else:
    BASE_DIR = Path("nfl-player-contact-detection")
    OUT_DIR = Path("nfl-player-contact-detection/frames")

In [5]:
ss = pd.read_csv(BASE_DIR/"sample_submission.csv")

labels = pd.read_csv(f"{BASE_DIR}/train_labels.csv", parse_dates=["datetime"])

tr_tracking = pd.read_csv(
    BASE_DIR/"train_player_tracking.csv", parse_dates=["datetime"]
)

te_tracking = pd.read_csv(
    BASE_DIR/"test_player_tracking.csv", parse_dates=["datetime"]
)

tr_helmets = pd.read_csv(BASE_DIR/"train_baseline_helmets.csv")
te_helmets = pd.read_csv(BASE_DIR/"test_baseline_helmets.csv")

tr_video_metadata = pd.read_csv(
    BASE_DIR/"train_video_metadata.csv",
    parse_dates=["start_time", "end_time", "snap_time"],
)

In [8]:
def create_features(df, tr_tracking, merge_col="datetime", use_cols=["x_position", "y_position", "speed", "distance", "direction","orientation", "acceleration", "sa"]):
    """
    Merges tracking data on player1 and 2 and computes the distance.
    """
    get_new_cols = lambda i, cols: {col: f'{col}_{i}' for col in cols}
    
    df_combo = (
        df.astype({"nfl_player_id_1": "str"})
        .merge(
            tr_tracking.astype({"nfl_player_id": "str"})[
                ["game_play", merge_col, "nfl_player_id"] + use_cols
            ],
            left_on=["game_play", merge_col, "nfl_player_id_1"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",
        )
        .rename(columns=get_new_cols(1, use_cols))
        .drop("nfl_player_id", axis=1)
        .merge(
            tr_tracking.astype({"nfl_player_id": "str"})[
                ["game_play", merge_col, "nfl_player_id"] + use_cols
            ],
            left_on=["game_play", merge_col, "nfl_player_id_2"],
            right_on=["game_play", merge_col, "nfl_player_id"],
            how="left",
        )
        .drop("nfl_player_id", axis=1)
        .rename(columns=get_new_cols(2, use_cols))
        .copy()
    )

    df_combo["distance"] = np.sqrt(
        np.square(df_combo["x_position_1"] - df_combo["x_position_2"])
        + np.square(df_combo["y_position_1"] - df_combo["y_position_2"])
    )
    return df_combo

In [9]:
df_combo = create_features(labels, tr_tracking)

In [10]:
df_combo = df_combo[(df_combo.distance.isna()) | (df_combo.distance <= 2)]
# df_combo = df_combo[df_combo.distance <= 2]

In [11]:
def join_helmets_contact(game_play, labels, helmets, meta, view="Sideline", fps=59.94):
    """
    Joins helmets and labels for a given game_play. Results can be used for visualizing labels.
    Returns a dataframe with the joint dataframe, duplicating rows if multiple contacts occur.
    """
    gp_labs = labels.query("game_play == @game_play").copy()
    gp_helms = helmets.query("game_play == @game_play").copy()

    start_time = meta.query("game_play == @game_play and view == @view")[
        "start_time"
    ].values[0]

    gp_helms["datetime"] = (
        pd.to_timedelta(gp_helms["frame"] * (1 / fps), unit="s") + start_time
    )
    gp_helms["datetime"] = pd.to_datetime(gp_helms["datetime"], utc=True)
    gp_helms["datetime_ngs"] = (
        pd.DatetimeIndex(gp_helms["datetime"] + pd.to_timedelta(50, "ms"))
        .floor("100ms")
        .values
    )
    gp_helms["datetime_ngs"] = pd.to_datetime(gp_helms["datetime_ngs"], utc=True)

    gp_labs["datetime_ngs"] = pd.to_datetime(gp_labs["datetime"], utc=True)
    gp_labs["nfl_player_id_1"] = gp_labs["nfl_player_id_1"].astype('int')

    gp = gp_helms.merge(
        gp_labs.query("contact == 1")[
            ["datetime_ngs", "nfl_player_id_1", "nfl_player_id_2", "contact_id"]
        ],
        left_on=["datetime_ngs", "nfl_player_id"],
        right_on=["datetime_ngs", "nfl_player_id_1"],
        how="left",
    )
    return gp

In [17]:
def merge_tracking_and_helmets_ts(tracking_df, helmets_df, meta_df, game_play, view, fps=59.94):
    get_new_cols = lambda i, cols: {col: f'{col}_{i}' for col in cols}
    
    gp_track = tracking_df.query('game_play == @game_play').copy()
    gp_helms = helmets_df.query('game_play == @game_play and view == @view').copy()
    
    start_time = meta_df.query("game_play == @game_play and view == @view")[
        "start_time"
    ].values[0]
    
    gp_helms["datetime"] = (
        pd.to_timedelta(gp_helms["frame"] * (1 / fps), unit="s") + start_time
    )
    gp_helms["datetime"] = pd.to_datetime(gp_helms["datetime"], utc=True)
    gp_helms["datetime_ngs"] = (
        pd.DatetimeIndex(gp_helms["datetime"] + pd.to_timedelta(50, "ms"))
        .floor("100ms")
        .values
    )
    gp_helms["datetime_ngs"] = pd.to_datetime(gp_helms["datetime_ngs"], utc=True)

    gp_track["datetime_ngs"] = pd.to_datetime(gp_track["datetime"], utc=True)
    
    # Merge the data
    return (gp_track
        .astype({"nfl_player_id_1": "str"})
        .merge(gp_helms.astype({"nfl_player_id": "str"})[
                ['game_play', 'view', 'datetime_ngs', 'frame', 'nfl_player_id', 'player_label', 'left', 'width', 'top', 'height']
               ], 
               left_on=['game_play', 'datetime_ngs', 'nfl_player_id_1'], 
               right_on=['game_play', 'datetime_ngs', 'nfl_player_id'],
               how='left')
         .rename(columns={"player_label": "player_label_1",
                          "left": "left_1",
                          "width": "width_1",
                          "top": "top_1",
                          "height": "height_1"})
         .drop(["nfl_player_id"], axis=1)
         .astype({"nfl_player_id_2": "str"})
         .merge(gp_helms.astype({"nfl_player_id": "str"})[
                 ['game_play', 'view', 'datetime_ngs', 'frame', 'nfl_player_id', 'player_label', 'left', 'width', 'top', 'height']
               ], 
               left_on=['game_play', 'datetime_ngs', 'frame', 'view', 'nfl_player_id_2'], 
               right_on=['game_play', 'datetime_ngs', 'frame', 'view', 'nfl_player_id'],
               how='left')
         .rename(columns={"player_label": "player_label_2",
                          "left": "left_2",
                          "width": "width_2",
                          "top": "top_2",
                          "height": "height_2"})
         .drop(["nfl_player_id"], axis=1)
    )

In [18]:
df_tracking_helmets = pd.DataFrame()
# df_tracking_helmets_2 = pd.DataFrame()

for gp in tqdm(df_combo.game_play.unique()):
    # df_tracking_helmets_2 = pd.concat([df_tracking_helmets_2,
    #                                  merge_tracking_and_helmets(df_combo.query('game_play == @gp'), 
    #                                                             tr_helmets.query('game_play == @gp'))])
    df_tracking_helmets = pd.concat([df_tracking_helmets,
                                     merge_tracking_and_helmets_ts(df_combo, tr_helmets, tr_video_metadata, gp, 'Sideline'),
                                     merge_tracking_and_helmets_ts(df_combo, tr_helmets, tr_video_metadata, gp, 'Endzone')])

  0%|          | 0/240 [00:00<?, ?it/s]

In [46]:
# df_tracking_helmets.to_parquet('df_tracking_helmets_below_2.parquet', index=False)

In [6]:
# df_tracking_helmets = pd.read_parquet('df_tracking_helmets.parquet')

In [25]:
df_tracking_helmets = calc_two_players_helmets_center(df_tracking_helmets)

In [19]:
df_tracking_helmets[(df_tracking_helmets.left_1.notnull()) & (df_tracking_helmets.left_2.notnull()) & (df_tracking_helmets.frame.notnull())].shape

(2404360, 37)

In [20]:
df_tracking_helmets[(df_tracking_helmets.left_1.notnull()) & (df_tracking_helmets.frame.notnull())].shape

(6186702, 37)

In [21]:
df_tracking_helmets = df_tracking_helmets[(df_tracking_helmets.left_1.notnull()) & (df_tracking_helmets.frame.notnull())]
# df_tracking_helmets = df_tracking_helmets[df_tracking_helmets.left_2.notnull()]

In [22]:
df_tracking_helmets[df_tracking_helmets.nfl_player_id_2 == "G"]

Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact,x_position_1,y_position_1,speed_1,distance_1,direction_1,orientation_1,acceleration_1,sa_1,x_position_2,y_position_2,speed_2,distance_2,direction_2,orientation_2,acceleration_2,sa_2,distance,datetime_ngs,view,frame,player_label_1,left_1,width_1,top_1,height_1,player_label_2,left_2,width_2,top_2,height_2
72,58168_003392_0_38590_G,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,G,0,40.33,25.28,0.52,0.06,141.08,100.37,0.59,0.58,,,,,,,,,,2020-09-11 03:01:48.100000+00:00,Sideline,295.0,H70,468.0,14.0,373.0,17.0,,,,,
73,58168_003392_0_38590_G,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,G,0,40.33,25.28,0.52,0.06,141.08,100.37,0.59,0.58,,,,,,,,,,2020-09-11 03:01:48.100000+00:00,Sideline,296.0,H70,467.0,14.0,372.0,17.0,,,,,
74,58168_003392_0_38590_G,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,G,0,40.33,25.28,0.52,0.06,141.08,100.37,0.59,0.58,,,,,,,,,,2020-09-11 03:01:48.100000+00:00,Sideline,297.0,H70,468.0,13.0,373.0,18.0,,,,,
75,58168_003392_0_38590_G,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,G,0,40.33,25.28,0.52,0.06,141.08,100.37,0.59,0.58,,,,,,,,,,2020-09-11 03:01:48.100000+00:00,Sideline,298.0,H70,468.0,13.0,372.0,18.0,,,,,
76,58168_003392_0_38590_G,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,G,0,40.33,25.28,0.52,0.06,141.08,100.37,0.59,0.58,,,,,,,,,,2020-09-11 03:01:48.100000+00:00,Sideline,299.0,H70,468.0,13.0,371.0,18.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9715,58582_003121_91_47872_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47872,G,0,62.63,42.98,1.60,0.16,163.51,121.08,0.99,0.32,,,,,,,,,,2021-10-12 02:42:29.100000+00:00,Endzone,838.0,V58,655.0,42.0,290.0,45.0,,,,,
9716,58582_003121_91_47872_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47872,G,0,62.63,42.98,1.60,0.16,163.51,121.08,0.99,0.32,,,,,,,,,,2021-10-12 02:42:29.100000+00:00,Endzone,839.0,V58,653.0,43.0,291.0,45.0,,,,,
9717,58582_003121_91_47872_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47872,G,0,62.63,42.98,1.60,0.16,163.51,121.08,0.99,0.32,,,,,,,,,,2021-10-12 02:42:29.100000+00:00,Endzone,840.0,V58,652.0,41.0,291.0,45.0,,,,,
9718,58582_003121_91_47872_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47872,G,0,62.63,42.98,1.60,0.16,163.51,121.08,0.99,0.32,,,,,,,,,,2021-10-12 02:42:29.100000+00:00,Endzone,841.0,V58,651.0,41.0,291.0,45.0,,,,,


In [23]:
df_tracking_helmets.shape

(6186702, 37)

In [26]:
df_tracking_helmets.to_parquet('nflutils/df_tracking_helmets_below_2.parquet', index=False)