# Setup

In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import cdist
from local_functions import *
import math
import re
import time

In [2]:
start_time = time.time()

In [3]:
main_dir = Path(os.getcwd())
data_path = main_dir / "data"
processed_data_path = data_path / 'processed'

In [4]:
df_players = pd.read_csv(f'{data_path}/players.csv')
df_games = pd.read_csv(f'{data_path}/games.csv')
df_tackles = pd.read_csv(f'{data_path}/tackles.csv')
df_plays = pd.read_csv(f'{data_path}/plays.csv')
#df_tracking = pd.read_csv(f'{data_path}/tracking_week_5.csv')

In [5]:
data_files = os.listdir(data_path)

matching_files = [file for file in data_files if file.startswith('tracking')]


In [6]:
df_tracking = pd.DataFrame()
for file in matching_files:
    i_df = pd.read_csv(data_path / file)
    df_tracking = pd.concat([df_tracking, i_df])

In [None]:
# Example play from Logan Wilson highlights
ex_gameId = df_games[(df_games['homeTeamAbbr']=='BAL') & (df_games['visitorTeamAbbr']=='CIN')]['gameId'].iloc[0]

ex_playId = df_plays[(df_plays['gameId'] == ex_gameId) & 
                     (df_plays['quarter'] == 1) &
                     (df_plays['gameClock'] == '12:57')]['playId'].iloc[0]

ex_gameId2 = 2022100901 # Additional game to work with in development

ex_games = [ex_gameId, ex_gameId2]

#df_tracking = df_tracking[df_tracking['gameId'].isin(ex_games)]

In [None]:
fid_cols = ['gameId', 'playId', 'frameId']
playerframe_cols = fid_cols + ['nflId']

# Standardize Tracking Data

In [None]:
# Add player positions to tracking data
df_tracking = df_tracking.merge(df_players.loc[:, ['nflId', 'position']], on='nflId', how='left')

#Identify offense and defensive players
df_tracking = df_tracking.merge(df_plays.loc[:, ['gameId', 'playId', 'possessionTeam']], on=['gameId', 'playId'], how='left')
df_tracking['on_offense'] = df_tracking['club'] == df_tracking['possessionTeam']


# Create game/play ID for easier filtering
df_tracking['game_play_id'] = df_tracking['gameId'].astype(str) + "_" + df_tracking['playId'].astype(str)

# Add indicator column for the ball carrier in the tracking data
df_tracking = df_tracking.merge(df_plays.loc[:, ['gameId', 'playId', 'ballCarrierId']], left_on=['gameId','playId','nflId'], right_on=['gameId','playId','ballCarrierId'], how='left')
df_tracking['is_ballcarrier'] = df_tracking['nflId'] == df_tracking['ballCarrierId']

In [None]:
# Standardize x/y coordinates - all plays going to the right
df_tracking['x_std'] = np.where(df_tracking['playDirection'] == 'left', 120 - df_tracking['x'], df_tracking['x'])
df_tracking['y_std'] = np.where(df_tracking['playDirection'] == 'left', 160/3 - df_tracking['y'], df_tracking['y'])

In [None]:
# Standardize player orientation - all plays going to the right
df_tracking['o_std'] = np.where(df_tracking['playDirection'] == 'left', df_tracking['o'] + 180, df_tracking['o'])
df_tracking['o_std'] = np.where(df_tracking['o_std'] > 360, df_tracking['o_std'] - 360, df_tracking['o_std'])
df_tracking['o_rad'] = np.radians(df_tracking['o_std'])
df_tracking['o_x'] = np.sin(df_tracking['o_rad'])
df_tracking['o_y'] = np.cos(df_tracking['o_rad'])

In [None]:
# Standardize player movement direction - all plays going to the right
df_tracking['dir_std'] = np.where(df_tracking['playDirection'] == 'left', df_tracking['dir'] + 180, df_tracking['dir'])
df_tracking['dir_std'] = np.where(df_tracking['dir_std'] > 360, df_tracking['dir_std'] - 360, df_tracking['dir_std'])
df_tracking['dir_rad'] = np.radians(df_tracking['dir_std'])
df_tracking['dir_x'] = np.sin(df_tracking['dir_rad'])
df_tracking['dir_y'] = np.cos(df_tracking['dir_rad'])

In [None]:
# Create columns for speed and acceleration in x/y direction
df_tracking['v_x'] = df_tracking['dir_x'] * df_tracking['s']
df_tracking['v_y'] = df_tracking['dir_y'] * df_tracking['s']

df_tracking['a_x'] = df_tracking['dir_x'] * df_tracking['a']
df_tracking['a_y'] = df_tracking['dir_y'] * df_tracking['a']

df_tracking['v_theta'] = np.arctan(df_tracking['v_y'] / df_tracking['v_x'])
df_tracking['v_theta'] = np.where(df_tracking['v_theta'].isnull(), 0, df_tracking['v_theta'])

# Features Relative to Ball Carrier

In [None]:
# Join ball carrier tracking to total tracking - allows for calculating distance, speed, etc of player X relative to the ball carrier
df_bc_frames = df_tracking[df_tracking['is_ballcarrier']].drop_duplicates(['gameId','playId','frameId','nflId']).loc[:,['gameId', 'playId', 'frameId', 'nflId', 'x_std', 'y_std', 'dir_std', 'o_std', 's', 'a']].reset_index(drop=True)

bc_radius = 3

df_bc_frames['bc_xr_min'] = df_bc_frames['x_std'] - bc_radius
df_bc_frames['bc_xr_max'] = df_bc_frames['x_std'] + bc_radius

df_bc_frames['bc_yr_min'] = df_bc_frames['y_std'] - bc_radius
df_bc_frames['bc_yr_max'] = df_bc_frames['y_std'] + bc_radius

bc_radius_cols = ['bc_xr_min', 'bc_xr_max', 'bc_yr_min', 'bc_yr_max']

In [None]:
df_tracking = df_tracking.merge(df_bc_frames.drop('nflId', axis=1), on = ['gameId','playId','frameId'], how = 'inner', suffixes = ('', '_bc'))

In [None]:
# Calculate the orientation of each player relative to the ball carrier
df_tracking = calc_angle_diff(input_df = df_tracking, 
                              xc = 'x_std', 
                              yc = 'y_std', 
                              anglec = 'o', 
                              xc_ref = 'x_std_bc', 
                              yc_ref = 'y_std_bc', 
                              new_name_suffix = 'bc')

# Calculate the movement direction of each player relative to the ball carrier
df_tracking = calc_angle_diff(input_df = df_tracking, 
                              xc = 'x_std', 
                              yc = 'y_std', 
                              anglec = 'dir', 
                              xc_ref = 'x_std_bc', 
                              yc_ref = 'y_std_bc', 
                              new_name_suffix = 'bc')

In [None]:
df_tracking.to_parquet(processed_data_path / 'tracking.parquet', engine='fastparquet')

# Calculate Distances

In [None]:
# Define positions on each side of the ball
offense_positions = ['QB', 'RB', 'FB', 'WR', 'TE', 'T', 'G', 'C', 'LS']
defense_positions = ['DE', 'NT', 'DT', 'ILB', 'OLB', 'MLB', 'CB', 'DB', 'FS', 'SS']

# Check positions are account for
# player_positions = list(df_players['position'].unique())
# player_positions.sort()
# all_positions = offense_positions + defense_positions
# all_positions.sort()
# player_positions == all_positions

In [None]:
df_tracking_4dists = df_tracking.loc[:, ['gameId', 'playId', 'frameId', 'nflId', 'position', 'x_std', 'y_std']].copy()

# new dataframe for data 
df_dists = pd.DataFrame()
for gid in df_tracking_4dists['gameId'].unique():
#     subset data down to one game
    game = df_tracking_4dists.loc[df_tracking_4dists['gameId']==gid].copy()
    
    for pid in game['playId'].unique():
        # subset data down to one play
        play = game.loc[game['playId']==pid].copy()

        for fid in play['frameId'].unique():
            # subset data down to one frame 
            frame = play.loc[play['frameId']==fid].copy()

            # make unique positions, as to not duplicate columns based on player position
            frame['pos_unique'] = (frame['position']
                                .add(frame
                                      .groupby('position', as_index=False)
                                      .cumcount()
                                      .add(1)
                                      .dropna()
                                      .astype(str)
                                      .str.replace('.0','', regex=False)
                                      .str.replace('0','', regex=False)))

            # calc distances 
            _df = (pd
                 .DataFrame(cdist(frame.loc[:, ['x_std', 'y_std']], 
                                  frame.loc[:, ['x_std', 'y_std']]), 
                            index=frame['nflId'], 
                            columns=frame['pos_unique'].fillna('football')))

            # reset index to pop out nflId into its own column
            _df = _df.reset_index()

            # merge new distance values onto original dataframe
            frame = frame.merge(_df)

            # concatenate new results into the output dataframe 
            df_dists = pd.concat([df_dists,frame])

In [None]:
df_dists.to_parquet(processed_data_path / 'dists.parquet', engine='fastparquet')

In [None]:
# Remove football rows and go from wide to long data
# Each row will be an observation of player A and their distance from player B
df_dists_long = df_dists[df_dists['nflId'].notnull()].drop(['x_std', 'y_std', 'football'],axis=1).melt(id_vars=fid_cols+['nflId', 'position', 'pos_unique'], var_name = 'x_pos_unique', value_name = 'dist')

In [None]:
# Filter out rows relating to the player to themselves
df_dists_long = df_dists_long[df_dists_long['pos_unique'] != df_dists_long['x_pos_unique']]

# Filter out rows relating to a unique position not present on the given play
df_dists_long = df_dists_long[df_dists_long['dist'].notnull()].reset_index(drop=True)

In [None]:
# Extract the raw player position for the unique position column
df_dists_long['x_position'] = extract_position_vectorized(df_dists_long['x_pos_unique'])

In [None]:
# Define side of the ball for both the reference player and target player
df_dists_long['side'] = np.where(df_dists_long['position'].isin(offense_positions), 'offense', 'defense')
df_dists_long['x_side'] = np.where(df_dists_long['x_position'].isin(offense_positions), 'offense', 'defense')
df_dists_long['same_side'] = df_dists_long['side'] == df_dists_long['x_side']

In [None]:
# Sort and assign ID's for each target player by side of ball and distance
# Each player within 1 frame will have 21 rows
# x1-x11 for each player on the other team / side of the ball in order of distance
# y1-y10 for each player on their team / side of the ball in order of distance

# Note: the loop calculating distances from each other player based on their standard position and aggregating in this way may miss unique plays that have a players on non-standard sides of the ball
# For example, a DL coming in as a FB on a goal line play won't be 100% accurate with this current method.

df_dists_long = df_dists_long.sort_values(playerframe_cols + ['same_side', 'dist']).reset_index(drop=True)
df_dists_long['x_id'] = df_dists_long.groupby(playerframe_cols).cumcount() + 1
df_dists_long['x_id'] = np.where(df_dists_long['same_side'], df_dists_long['x_id']-11, df_dists_long['x_id'])
df_dists_long['x_id'] = np.where(~df_dists_long['same_side'], df_dists_long['x_id'].apply(lambda x: 'x' + str(x)), df_dists_long['x_id'].apply(lambda x: 'y' + str(x)))

In [None]:
#df_dists_long.head(25)

In [None]:
df_dists_processed = df_dists_long.loc[:, playerframe_cols + ['x_id', 'dist']].pivot(index = playerframe_cols, columns = 'x_id', values = 'dist').reset_index()

In [None]:
df_dists_processed.to_parquet(processed_data_path / 'dists_processed.parquet', engine='fastparquet')

# Field Control

In [None]:
# Calculate player influence radius
s_max = 13
delta_t = 0.5
min_radius = 4
max_radius = 10
radius_range = max_radius - min_radius
max_dist_from_ball = 20

df_tracking['s_ratio'] = df_tracking['s'] / s_max
df_tracking['x_next'] = df_tracking['x_std'] + df_tracking['v_x'] * delta_t
df_tracking['y_next'] = df_tracking['y_std'] + df_tracking['v_y'] * delta_t
df_tracking['radius_of_influence'] = min_radius + np.power(df_tracking['dist_to_bc'],3) * radius_range / max_dist_from_ball
df_tracking['radius_of_influence'] = np.where(df_tracking['radius_of_influence'] > max_radius, max_radius, df_tracking['radius_of_influence'])

In [None]:
df_tracking_bcradius = df_tracking[(df_tracking['x_std'] >= df_tracking['bc_xr_min']) &
                                   (df_tracking['x_std'] <= df_tracking['bc_xr_max']) &
                                   (df_tracking['y_std'] >= df_tracking['bc_yr_min']) &
                                   (df_tracking['y_std'] <= df_tracking['bc_yr_max'])]

In [None]:
#df_tracking.loc[:, ['frameId', 'nflId', 'x_std', 'y_std', 'x_std_bc', 'y_std_bc', 's', 'v_theta', 'v_x', 'v_y', 's_ratio', 'x_next', 'y_next', 'radius_of_influence']]

In [None]:
df_field_grid = create_field_grid()

In [None]:
df_bcradius_noball = df_tracking_bcradius[df_tracking_bcradius['nflId'].notnull()].reset_index(drop=True)

df_precontrol_ids = df_bcradius_noball.loc[:,['gameId', 'playId', 'frameId', 'nflId', 'club']]
df_precontrol_values = df_bcradius_noball.loc[:,['x_next', 'y_next', 'v_theta', 'radius_of_influence', 's_ratio'] + bc_radius_cols]

In [None]:
player_frame_influences = []
#df_control = pd.DataFrame()

for index, row in df_precontrol_values.iterrows():
    #print(index)
    xmin = row['bc_xr_min']
    xmax = row['bc_xr_max']
    ymin = row['bc_yr_min']
    ymax = row['bc_yr_max']

    row_field_grid = df_field_grid[(df_field_grid['x'] >= xmin) &
                                   (df_field_grid['x'] <= xmax) &
                                   (df_field_grid['y'] >= ymin) &
                                   (df_field_grid['y'] <= ymax)]


    row_influence = compute_player_zoi(row, row_field_grid)
    player_frame_influences.append(row_influence)

    #df_influence = compute_player_zoi(row)
    #df_control = pd.concat([df_control, df_influence])



In [None]:
influence_means = [sum(x)/len(x) for x in player_frame_influences]
df_precontrol_ids['influence'] = influence_means

In [None]:
# precontrol_len = len(df_precontrol_ids.index)
# fieldgrid_len = len(df_field_grid.index)

# filler_influence = np.full(shape = (precontrol_len, fieldgrid_len), fill_value=0.5)

# filler_influence = filler_influence.tolist()

# df_precontrol_ids['influence'] = filler_influence

In [None]:
df_control = df_precontrol_ids.merge(df_games.loc[:,['gameId', 'homeTeamAbbr']], on = 'gameId')
df_control['team_influence'] = np.where(df_control['club'] == df_control['homeTeamAbbr'], -1 * df_control['influence'], df_control['influence'])

# df_teamcontrol = df_teamcontrol.groupby(['gameId', 'playId', 'frameId', 'club', 'x', 'y']).agg({'influence' : 'sum'}).reset_index().rename(columns={'influence' : 'control'})
# df_teamcontrol['control'] = 1 / (1 + np.exp(df_control_agg['control']))
# scale_column_to_100(dataframe = df_control_agg, column_name = 'influence')

In [None]:
df_control.to_parquet(processed_data_path / 'control.parquet', engine='fastparquet')

# Block Sheds

In [None]:
# # Calculate the point where a blocker projects onto a defender's path to the ball carrier
# target = (1,6)
# defender = (3,10)
# blocker = (4,7)

# line_to_target = line_equation(defender, target)

# blocker_projection = perpendicular_projection(blocker, line_to_target)


# # Determine if an offensive player is a potential blocker
# x, y = 37, 16
# xd = 7
# yd = 3
# facing_angle = 200  # in degrees
# maxX = 120
# maxY = 53.3
# minX = 0
# minY = 0


# # Calculate potential blocker region for a defender based on their orientation angle
# triangle_points = project_triangle(x, y, facing_angle, xd, yd, maxX, maxY, minX, minY)

# print(triangle_points)

# point_to_check = (32, 15)

# is_point_in_triangle(point_to_check, *triangle_points)

In [None]:
block_radius = 1

df_potential_blocks = df_dists_long[(df_dists_long['side']=='defense') & (df_dists_long['dist']<=block_radius)].loc[:, playerframe_cols + ['position', 'x_position', 'x_pos_unique', 'dist']].reset_index(drop=True)

In [None]:
df_potential_blockers = df_dists.loc[:, playerframe_cols+['pos_unique', 'x_std', 'y_std']].rename(columns={'pos_unique' : 'x_pos_unique', 'nflId' : 'x_nflId'})

In [None]:
# join id and x/y of potential blocker
df_potential_blocks = df_potential_blocks.merge(df_potential_blockers, on = ['gameId', 'playId', 'frameId', 'x_pos_unique'], suffixes = ('', '_x'))

In [None]:
# join x,y,o of defender
df_potential_blocks = df_potential_blocks.merge(df_tracking.loc[:, playerframe_cols+['x_std', 'y_std', 'o_std']], on = playerframe_cols, suffixes = ('_x', ''))

In [None]:
#join o of blocker
df_potential_blocks = df_potential_blocks.merge(df_tracking.loc[:, playerframe_cols+['o_std']].rename(columns={'nflId':'x_nflId'}), on = fid_cols+['x_nflId'], suffixes = ('', '_x'))

In [None]:
df_potential_blocks = calc_angle_diff(input_df = df_potential_blocks, 
                              xc = 'x_std', 
                              yc = 'y_std', 
                              anglec = 'o_std', 
                              xc_ref = 'x_std_x', 
                              yc_ref = 'y_std_x', 
                              new_name_suffix = 'blocker')

In [None]:
df_potential_blocks = df_potential_blocks.drop(['x_dist_to_blocker', 'y_dist_to_blocker', 'dist_to_blocker'],axis=1)

In [None]:
df_potential_blocks = calc_angle_diff(input_df = df_potential_blocks, 
                              xc = 'x_std_x', 
                              yc = 'y_std_x', 
                              anglec = 'o_std_x', 
                              xc_ref = 'x_std', 
                              yc_ref = 'y_std', 
                              new_name_suffix = 'defender')

In [None]:
df_potential_blocks = df_potential_blocks.drop(['x_dist_to_defender', 'y_dist_to_defender', 'dist_to_defender'],axis=1)

In [None]:
df_potential_blocks.to_parquet(processed_data_path / 'potential_blocks.parquet', engine='fastparquet')

# CHECKPOINT

In [None]:
end_time = time.time()
elapsed_time = end_time - start_time

print(f"Processing time: {elapsed_time} seconds")

# Play Animation

In [None]:
df_control_1frame = df_control_agg[(df_control_agg['frameId']==75)]# & (df_control_agg['club']=='PIT')]

from plotnine import ggplot, geom_raster, scale_fill_gradient2, aes

# Assuming df_colors and game_ are already defined in your Python script

away_team = 'BUF'
away_color = 'blue'

home_team = 'PIT'
home_color = 'black'


play_frames = (
    ggplot()
    + geom_raster(
        data=df_control_1frame,
        mapping=aes(x='x', y='y', fill='control'),
        alpha=0.7,
        interpolate=True
    )
    + scale_fill_gradient2(
        #low=df_colors['away_1'],
        #high=df_colors['home_1'],
        low= away_color,
        high= home_color,
        mid="white",
        midpoint=0.5,
        name="Team Field Control",
        limits=[0, 1],
        breaks=[0, 1],
        #labels=[game_['visitorTeamAbbr'], game_['homeTeamAbbr']]
        labels=[away_team, home_team]
    )
)

# You can then show the plot using play_frames
print(play_frames)


In [None]:
# Plotting
triangle_points_plot = [(37,16)]+triangle_points

points_x, points_y = zip(*triangle_points_plot + [triangle_points_plot[0]])  # Close the triangle

plt.plot(points_x, points_y, marker='o', linestyle='-', color='b', label='Projected Triangle')
plt.scatter(32,15)

In [None]:
ex_game_play_id = '2022100913_172'
#ex_game_play_id = '2022100913_55'
ex_game_play_id = str(ex_gameId) + '_' + str(ex_playId)

In [None]:
animate_tracking_data(tracking_df = df_tracking, 
                      id_game_play = ex_game_play_id,
                      x_col = 'x_std',
                      y_col = 'y_std',
                      dir_col = 'dir_std',
                      dir_arrow_metric = 's',
                      o_col = 'o_std')