In [2]:
#import standard libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [62]:
#read tracking and player_play csvs
tracking_linemen = pd.read_csv('tracking_linemen.csv')
tracking_football = pd.read_csv('tracking_football.csv')
tracking_simple = pd.read_csv('tracking_simple.csv')
df_player_play_motion = pd.read_csv('player_play.csv')

In [64]:
#see if this can be used for earlier functions
def add_column_from_other_df(df, other_df, col, new_col_name, merge_keys=['gameId', 'playId', 'nflId']):

    #shrink other dataframe
    df_small = other_df[merge_keys + [col]]
    
    #put placeholder column name that won't be in df
    df_small.rename(columns={col: 'placeholder_column_x09ds623n'}, inplace=True)
    
    #merge the dataframes
    df = df.merge(df_small, on = merge_keys)

    #rename the new column
    df.rename(columns={'placeholder_column_x09ds623n': new_col_name}, inplace=True)

    return df


In [66]:
#get frames of ball snap and last_motion
last_motion_frame = tracking_simple[tracking_simple['in_motion'] == 1].groupby(['gameId', 'playId', 'nflId'])['frameId'].max().reset_index()
snap_frame = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['frameId'].max().reset_index()

df_player_play_motion = add_column_from_other_df(df_player_play_motion, last_motion_frame, 'frameId', 'last_motion_frameId')
df_player_play_motion = add_column_from_other_df(df_player_play_motion, snap_frame, 'frameId', 'snap_frameId')


In [68]:
df_player_play_motion['snap_motion_gap'] = df_player_play_motion['snap_frameId'] - df_player_play_motion['last_motion_frameId']

In [70]:
#need to check why these aren't fully consistent, seem mostly the same though
df_player_play_motion[['inMotionAtBallSnap','snap_motion_gap']].head(5)

Unnamed: 0,inMotionAtBallSnap,snap_motion_gap
0,True,10
1,True,27
2,True,0
3,False,43
4,True,0


In [72]:
#get initial y and final y coordinates (relative to the ball)
initial_rel_y_dis = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['y_dis_from_fb'].first().reset_index()
end_rel_y_dis = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['y_dis_from_fb'].last().reset_index()

df_player_play_motion = add_column_from_other_df(df_player_play_motion, initial_rel_y_dis, 'y_dis_from_fb', 'init_rel_y_dis')
df_player_play_motion = add_column_from_other_df(df_player_play_motion, end_rel_y_dis, 'y_dis_from_fb', 'end_rel_y_dis')

#calculate overall change in player's y coordinate
df_player_play_motion['overall_y_change'] = df_player_play_motion['init_rel_y_dis'] - df_player_play_motion['end_rel_y_dis']

#same steps but for x
initial_rel_x_dis = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['x_dis_from_fb'].first().reset_index()
end_rel_x_dis = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['x_dis_from_fb'].last().reset_index()

df_player_play_motion = add_column_from_other_df(df_player_play_motion, initial_rel_x_dis, 'x_dis_from_fb', 'init_rel_x_dis')
df_player_play_motion = add_column_from_other_df(df_player_play_motion, end_rel_x_dis, 'x_dis_from_fb', 'end_rel_x_dis')

df_player_play_motion['overall_x_change'] = (df_player_play_motion['init_rel_x_dis'] - df_player_play_motion['end_rel_x_dis'])

In [74]:
#find average player speed during their motion
motion_speed_avg = tracking_simple[tracking_simple['in_motion'] == 1].groupby(['gameId', 'playId', 'nflId'])['s_std'].mean().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, motion_speed_avg, 's_std', 'motion_s_avg')

In [76]:
#find player's total time in motion
motion_sum = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['in_motion'].sum().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, motion_sum, 'in_motion', 'frames_in_motion')

In [78]:
#find farthest x values the player gets from their initial points on both sides
max_x_change = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['x_change_all'].max().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, max_x_change, 'x_change_all', 'max_x_rel_pos')

min_x_change = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['x_change_all'].min().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, min_x_change, 'x_change_all', 'min_x_rel_pos')

#same for y
max_y_change = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['y_change_all'].max().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, max_y_change, 'y_change_all', 'max_y_rel_pos')

min_y_change = tracking_simple.groupby(['gameId', 'playId', 'nflId'])['y_change_all'].min().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, min_y_change, 'y_change_all', 'min_y_rel_pos')


In [82]:
# #find how far away from the offensive line the player started

#get lineman's distance from football (x and y)
tracking_linemen = tracking_linemen.merge(tracking_football, on = ['gameId','playId','frameId'], suffixes=('','_fb'))
tracking_linemen['y_dis_from_fb'] = tracking_linemen['y_std'] - tracking_linemen['y_std_fb']
tracking_linemen.drop(columns=['frameId'], inplace=True)

#for every play designate the furthest lineman on each side of the line
max_line = tracking_linemen.groupby(['gameId', 'playId'])['y_dis_from_fb'].max().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, max_line, 'y_dis_from_fb', 'line_y_max', merge_keys=['gameId', 'playId'])

min_line = tracking_linemen.groupby(['gameId', 'playId'])['y_dis_from_fb'].min().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, min_line, 'y_dis_from_fb', 'line_y_min', merge_keys=['gameId', 'playId'])

#need to flip necessary y values before going further

In [84]:
#if y started off negative, flip y values so it starts off positive
mask = df_player_play_motion['init_rel_y_dis'] < 0
df_player_play_motion.loc[mask, ['init_rel_y_dis', 'end_rel_y_dis', 'overall_y_change', 'max_y_rel_pos', 'min_y_rel_pos','line_y_max','line_y_min']] *= -1

In [86]:
#then: note how far the player was from the line at the beginning and end
df_player_play_motion['dis_line_start'] = df_player_play_motion['init_rel_y_dis'] - df_player_play_motion[['line_y_max', 'line_y_min']].max(axis=1)
df_player_play_motion['dis_line_end'] = df_player_play_motion['end_rel_y_dis'] - df_player_play_motion[['line_y_max', 'line_y_min']].max(axis=1)

#finally: drop lineman position columns
df_player_play_motion.drop(columns=['line_y_max','line_y_min'], inplace=True)

In [88]:
#Convert inMotionAtBallSnap to numeric column
df_player_play_motion['inMotionAtBallSnap'] = df_player_play_motion['inMotionAtBallSnap'] * 1

In [90]:
#note if the player ran past the football on the motion
max_dist_fb = tracking_simple.groupby(['gameId','playId','nflId'])['y_dis_from_fb'].max().reset_index()
min_dist_fb = tracking_simple.groupby(['gameId','playId','nflId'])['y_dis_from_fb'].min().reset_index()
df_player_play_motion = add_column_from_other_df(df_player_play_motion, max_dist_fb, 'y_dis_from_fb', 'max_dist_fb')
df_player_play_motion = add_column_from_other_df(df_player_play_motion, min_dist_fb, 'y_dis_from_fb', 'min_dist_fb')

# Create a new column 'same_sign' based on the condition
df_player_play_motion['passed_fb'] = ((df_player_play_motion['max_dist_fb'] >= 0) & (df_player_play_motion['min_dist_fb'] >= 0)) | \
                                       ((df_player_play_motion['max_dist_fb'] < 0) & (df_player_play_motion['min_dist_fb'] < 0))
df_player_play_motion['passed_fb'] = df_player_play_motion['passed_fb']*1

In [94]:
df_clustering_cols = df_player_play_motion[['inMotionAtBallSnap','init_rel_y_dis','end_rel_y_dis','overall_y_change','init_rel_x_dis','end_rel_x_dis','overall_x_change','motion_s_avg','frames_in_motion','max_x_rel_pos', 'min_x_rel_pos', 'max_y_rel_pos','min_y_rel_pos', 'dis_line_start', 'dis_line_end','passed_fb']]

In [96]:
#export dataframes
df_clustering_cols.to_csv('clustering_columns.csv', index=False)
df_player_play_motion.to_csv('df_player_play_motion.csv',index=False)