# Intro
This notebook calculates the force of each player per frame, net force of each team per frame, 
and net partitioned forces per frame

In [1]:
import pandas as pd
import csv
import numpy as np
from datetime import datetime
import dateutil.parser
from IPython.display import display
import math
from math import sin, cos, radians

pd.options.display.max_columns = None

games = pd.read_csv("games.csv")
weeks_dict = dict()
for i in range(1, 9):
    weeks_dict[i] = pd.read_pickle(f'week{i}_clean_pickled.pkl')
pff = pd.read_pickle("pff_pickled.pkl")

players = pd.read_csv("players.csv")
play = pd.read_csv("plays.csv")

# Read in data

Reduce memory usage of large dataframes i.e. tracking data across weeks

In [2]:
weeks_dict = dict()
for i in range(1, 9):
    headers = [*pd.read_csv(f'week{i}_cut_by_frame_clean.csv', nrows=1)]
    weeks_dict[i] = pd.read_csv(f'week{i}_cut_by_frame_clean.csv', usecols =[i for i in headers if i != 'time'])
    
    # reduce memory usage and fill NaN values with 0, if appropriate
    weeks_dict[i]['gameId'] = weeks_dict[i]['gameId'].fillna(0).astype('uint32', errors = 'ignore')
    weeks_dict[i]['nflId'] = weeks_dict[i]['nflId'].fillna(0).astype('uint32', errors = 'ignore')
    weeks_dict[i]['playId'] = weeks_dict[i]['playId'].fillna(0).astype('uint16', errors = 'ignore')
    weeks_dict[i]['frameId'] = weeks_dict[i]['frameId'].fillna(0).astype('uint8', errors = 'ignore')
    weeks_dict[i]['jerseyNumber'] = weeks_dict[i]['jerseyNumber'].fillna(-1).astype('int8', errors = 'ignore')
    weeks_dict[i]['team'] = weeks_dict[i]['team'].astype('category', errors = 'ignore')
    weeks_dict[i]['playDirection'] = weeks_dict[i]['playDirection'].astype('category', errors = 'ignore')
    weeks_dict[i][weeks_dict[i].select_dtypes(np.float64).columns] = weeks_dict[i].select_dtypes(np.float64).astype(np.float32)
    weeks_dict[i]['event'] = weeks_dict[i]['event'].astype('category', errors = 'ignore')
    
    # pickle the cleaned up tracking data df
    weeks_dict[i].to_pickle(f'week{i}_clean_pickled.pkl')

games = pd.read_csv("games.csv")
pff = pd.read_csv("pffScoutingData.csv")
pff = pff[['gameId', 'playId', 'nflId', 'pff_role', 'pff_positionLinedUp', 'pff_hit', 'pff_hurry', 'pff_sack']]

pff['gameId'] = pff['gameId'].fillna(-1).astype('uint32', errors = 'ignore')
pff['nflId'] = pff['nflId'].fillna(-1).astype('uint32', errors = 'ignore')
pff['playId'] = pff['playId'].fillna(-1).astype('uint16', errors = 'ignore')
pff['pff_role'] = pff['pff_role'].astype('category')
pff['pff_positionLinedUp'] = pff['pff_positionLinedUp'].astype('category')
pff['pff_hit'] = pff['pff_hit'].fillna(0).astype('int8')
pff['pff_hurry'] = pff['pff_hurry'].fillna(0).astype('int8')
pff['pff_sack'] = pff['pff_sack'].fillna(0).astype('int8')

players = pd.read_csv("players.csv")
play = pd.read_csv("plays.csv")

# Force Calculations

In [3]:
# create dictionary to find player weight given their nfl id
players_dict = players[['nflId', 'weight']].set_index('nflId').to_dict()

def find_player_weight(nflID):
    """
    Wrapper function for finding weight of player given their nfl ID.

    Parameters
    ----------
    nflID: int 
        Corresponds to the nfl ID of a given player

    Returns
    -------
    int
        Weight of the player. If the nfl ID does not exist, then return None 
    """
    if nflID in players_dict['weight'].keys():
        return players_dict['weight'][nflID]
    return None


In [4]:
# calculate force and merge with PFF data
merged_weeks = dict()
for i in range(1, 9):
    weeks_dict[i] = weeks_dict[i].drop(columns = ['s', 'o', 'event'])
    weeks_dict[i]['weight'] = weeks_dict[i]['nflId'].apply(lambda x : find_player_weight(x))
    weeks_dict[i]['force'] = weeks_dict[i]['a'] * weeks_dict[i]['weight']
    merged_weeks[i]  = weeks_dict[i].merge(pff, how = 'left', on = ['gameId', 'playId', 'nflId'])


In [5]:
# check if the force is always positive (aka check if no negative acceleration)
for i in range(1, 9):
    mask = (weeks_dict[i]['a'] < 0)
    assert(len(weeks_dict[i][mask]) == 0)

In [None]:
# calculate horizontal and vertical force vectors
# calculations are explained in Kaggle notebook
for i in range(1, 9):
    merged_weeks[i]['adj_angle'] = 90 - merged_weeks[i]['dir'] 
    merged_weeks[i]['adj_angle'] = merged_weeks[i]['adj_angle'] * math.pi / 180
    merged_weeks[i]['cos'] = np.cos(merged_weeks[i]['adj_angle'])
    merged_weeks[i]['sin'] = np.sin(merged_weeks[i]['adj_angle'])
    merged_weeks[i]['x_force'] = merged_weeks[i]['force'] * merged_weeks[i]['cos']
    merged_weeks[i]['y_force'] = merged_weeks[i]['force'] * merged_weeks[i]['sin']
    merged_weeks[i] = merged_weeks[i].drop(columns = ['cos', 'sin'])
    merged_weeks[i].to_csv(f'forces_week{i}.csv', index = False)

# Net Force Calculations

In [None]:
def calculate_net_forces(df):
    """
    Calculates the net x force and y force

    Parameters
    ----------
    df: pd.DataFrame
        Dataframe that contains x force and y force of players over each game, play, and frame

    Returns
    -------
    pd.DataFrame
        Dataframe with net x force and net y force of pass rushers and blockers over each game and play.
        Note that the dataframe is NOT over frame ID
    """
    df_defense = df[df['pff_role'] == 'Pass Rush']
    df_offense = df[df['pff_role'] == 'Pass Block']
    
    offense_grouped = df_offense.groupby(['gameId', 'playId', 'frameId'])
    defense_grouped = df_defense.groupby(['gameId', 'playId', 'frameId'])
    
    net_x_force_offense = offense_grouped['x_force'].sum()
    net_y_force_offense = offense_grouped['y_force'].sum()
    
    net_x_force_defense = defense_grouped['x_force'].sum()
    net_y_force_defense = defense_grouped['y_force'].sum()
    
    x_idx = net_x_force_defense.index.union(net_x_force_offense.index)
    y_idx = net_y_force_defense.index.union(net_y_force_offense.index)
    
    total_x_force = net_x_force_defense.reindex(x_idx, fill_value = 0) + net_x_force_offense.reindex(x_idx, fill_value = 0)
    total_y_force = net_y_force_defense.reindex(y_idx, fill_value = 0) + net_y_force_offense.reindex(y_idx, fill_value = 0)
    
    
    total_x_force_df = total_x_force.to_frame(name = 'net_x_force').reset_index().set_index(['gameId', 'playId', 'frameId'])
    total_y_force_df = total_y_force.to_frame(name = 'net_y_force').reset_index().set_index(['gameId', 'playId', 'frameId'])
    
    total_force_df = pd.concat([total_x_force_df, total_y_force_df], axis = 1).reset_index()
    return total_force_df

In [None]:
# run calculate_net_forces for all our dataframes
net_forces_dict = dict()
for i in range(1, 9):
    net_forces_dict[i] = calculate_net_forces(merged_weeks[i])

# Partitioned Force Calculations

In [None]:
def find_lt_y(df):
    """
    Find the y value of the leftmost pass blocker, which is usually the left tackle. 
    
    If left tackle cannot be found, we return the y value of the left guard.

    Parameters
    ----------
    df: pd.DataFrame
        Dataframe for a given game ID, play ID
        
    Returns
    -------
    float
        y value of leftmost pass blocker 
    """
    lg_y = df[df['pff_positionLinedUp'] == 'LT']['y']
    if len(lg_y) == 0:
        lg_y = lg_y = df[df['pff_positionLinedUp'] == 'LG']['y']
    return lg_y.values[0]

def find_rt_y(df):
    """
    Find the y value of the rightmost pass blocker, which is usually the right tackle. 
    
    If right tackle cannot be found, we return the y value of the right guard.

    Parameters
    ----------
    df: pd.DataFrame
        Dataframe for a given game ID, play ID
        
    Returns
    -------
    float
        y value of rightmost pass blocker 
    """

    rg_y = df[df['pff_positionLinedUp'] == 'RT']['y']
    if len(rg_y) == 0:
        rg_y = df[df['pff_positionLinedUp'] == 'RG']['y']
    return rg_y.values[0]
    

def create_indicator(df, simple = False):
    """
    Create a dataframe with an indicator column that indicates which partition a player is in

    Parameters
    ----------
    df: pd.DataFrame
        Dataframe that includes tracking data of player over game, play, and frame.
    simple: bool
        Optional parameter to specify if partitions stay fixed throughout a play or change over the course of a play.
        If simple == False, then the partitions per frame ID are based on locations of 
        rightmost and leftmost pass blockers.
        
    Returns
    -------
    pd.DataFrame
        y value of leftmost pass blocker 
    """
    if simple:
        to_merge1 = df.groupby(['gameId', 'playId']).apply(lambda x : find_lt_y(x)).reset_index().rename(columns = {0 : 'lt_y'})
        to_merge2 = df.groupby(['gameId', 'playId']).apply(lambda x : find_rt_y(x)).reset_index().rename(columns = {0 : 'rt_y'})
        new_df = pd.merge(df, to_merge1, how = "left", on = ["playId","gameId"])
        new_df = pd.merge(new_df, to_merge2, how = "left", on = ["playId","gameId"])
    else:
        # the two merges take around ~30-60 seconds
        to_merge1 = df.groupby(['gameId', 'playId', 'frameId']).apply(lambda x : find_lt_y(x)).reset_index().rename(columns = {0 : 'lt_y'})
        to_merge2 = df.groupby(['gameId', 'playId', 'frameId']).apply(lambda x : find_rt_y(x)).reset_index().rename(columns = {0 : 'rt_y'})
    
        new_df = pd.merge(df, to_merge1, how="left",on = ["frameId","playId","gameId"])
        new_df = pd.merge(new_df, to_merge2, how="left",on = ["frameId","playId","gameId"])
    
    new_df['max_y'] = new_df[['lt_y', 'rt_y']].max(axis = 1)
    new_df['min_y'] = new_df[['lt_y', 'rt_y']].min(axis = 1)
    new_df['indicator'] = np.where(new_df['y'] < new_df['min_y'], -1, 
                                         np.where((new_df['min_y'] <= new_df['y']) & (new_df['y'] <= new_df['max_y']), 0, 
                                                 1))
    return new_df    

In [None]:
# calculate the partitioned forces
partitioned_dict = dict()
for i in range(1, 9):
    force_with_indicator_df = create_indicator(merged_weeks[i], simple = True)
    net_force_df = force_with_indicator_df.groupby(['indicator']).apply(calculate_net_forces)
    net_force_df = net_force_df.reset_index(level = 1, drop = True)
    net_force_df = net_force_df.reset_index()
    
    # groups will hold dataframe of net force per partitione
    groups = []
    for n, g in net_force_df.groupby('indicator'):
        groups.append(g)
        
    # merge all the partitioned dataframes together
    df = groups[0].merge(groups[1], how = 'outer', on = ['gameId', 'playId', 'frameId'])
    df = df.merge(groups[2], how = 'outer', on = ['gameId', 'playId', 'frameId'])

    # rename columns
    df = df.rename(columns = {'net_x_force' : 'net_x_force_top', 'net_y_force' : 'net_y_force_top',
                             'net_x_force_x' : 'net_x_force_bottom', 'net_y_force_x' : 'net_y_force_bottom',
                             'net_x_force_y' : 'net_x_force_middle', 'net_y_force_y' : 'net_y_force_middle'})
    df = df.drop(columns = ['indicator', 'indicator_x', 'indicator_y'])
    partitioned_dict[i] = df

In [None]:
# merge partitioned net forces and net forces into 1 data frame
for i in range(1, 9):
    df = pd.merge(partitioned_dict[i], net_forces_dict[i], how = 'outer', on = ['gameId', 'playId', 'frameId'])
    df.to_csv(f"partitioned_forces{i}.csv")
    display(df)