In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import cdist

In [None]:
main_dir = Path(os.getcwd())
data_path = main_dir / "data"

In [None]:
players = pd.read_csv(f'{data_path}/players.csv')
games = pd.read_csv(f'{data_path}/games.csv')
tackles = pd.read_csv(f'{data_path}/tackles.csv')
plays = pd.read_csv(f'{data_path}/plays.csv')
tracking = pd.read_csv(f'{data_path}/tracking_week_1.csv')

In [None]:
tracking = tracking.merge(players.loc[:, ['nflId', 'position']], how='left')

def calc_dist_1frame(frame):
    # make unique positions, as to not duplicate columns based on player position
    frame['pos_unique'] = (frame['position']
                        .add(frame
                            .groupby('position', as_index=False)
                            .cumcount()
                            .add(1)
                            .dropna()
                            .astype(str)
                            .str.replace('.0','', regex=False)
                            .str.replace('0','', regex=False)))

    # calc distances 
    _df = (pd
        .DataFrame(cdist(frame.loc[:, ['x', 'y']], 
                        frame.loc[:, ['x', 'y']]), 
                    index=frame['nflId'], 
                    columns=frame['pos_unique'].fillna('football')))

    # reset index to pop out nflId into its own column
    _df = _df.reset_index()

    # merge new distance values onto original dataframe
    frame = frame.merge(_df)

    return frame

def calc_dist_1play(play):

    df_all_frames = pd.DataFrame()

    for fid in play['frameId'].unique():

        df_frame = play.loc[play['frameId']==fid].copy()

        df_frame_dists = calc_dist_1frame(df_frame)

        # concatenate new results into the output dataframe 
        df_all_frames = pd.concat([df_all_frames, df_frame_dists])

    return df_all_frames

In [None]:
play = tracking[(tracking["gameId"] == 2022090800) & (tracking["playId"] == 56)]

play_dists = calc_dist_1play(play)

In [None]:
game = tracking[(tracking["gameId"] == 2022090800)]

df_all_plays = pd.DataFrame()

for pid in game["playId"].unique():

    df_play = game.loc[game['playId']==pid].copy()

    df_play_dists = calc_dist_1play(df_play)

    df_all_plays = pd.concat([df_all_plays, df_play_dists])

df_all_plays.head()

In [None]:
print(df_all_plays.dtypes)

In [None]:
df_all_plays.columns

In [None]:
pos_start = list(df_all_plays.columns).index("pos_unique") + 1

In [None]:
positions = df_all_plays.columns[pos_start:]

In [None]:
df_all_plays.dtypes