In [22]:
import pandas as pd
import numpy as np
import pymc as pm
import seaborn as sns
from sklearn.model_selection import train_test_split
pd.set_option('display.max_rows', 500)
import arviz as az
import matplotlib.pyplot as plt

## READ IN DATA

In [23]:
# select all the players besides dbs and wrs
players = pd.read_csv("https://media.githubusercontent.com/media/cnickol26/BigDataBowl2023/main/nfl-big-data-bowl-2023/players.csv")

positions = ['DE', 'OLB','DT', 'ILB', 'NT', 'MLB', 'LB', 'RB', 'T', 'TE','G','QB','C','FB']
positions_df = players[players['officialPosition'].isin(positions)]
players_list = positions_df['nflId']

In [24]:
# Read in all the weeks but only for the positons above
locations = pd.DataFrame()
for i in range(1,8):
    url = 'https://media.githubusercontent.com/media/cnickol26/BigDataBowl2023/main/nfl-big-data-bowl-2023/week'+str(i)+'.csv'
    week_data = pd.read_csv(url)
    week_data = week_data[(week_data['nflId'].isin(players_list)) | (week_data['team'] == 'football')]
    locations = pd.concat([locations, week_data])

In [25]:
locations

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,s,a,dis,o,dir,event
0,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,right,37.77,24.22,0.29,0.30,0.03,165.16,84.99,
1,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,right,37.78,24.22,0.23,0.11,0.02,164.33,92.87,
2,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,right,37.78,24.24,0.16,0.10,0.01,160.24,68.55,
3,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,right,37.73,24.25,0.15,0.24,0.06,152.13,296.85,
4,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,right,37.69,24.26,0.25,0.18,0.04,148.33,287.55,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
906287,2021102500,3998,,71,2021-10-26 03:17:05,,football,left,112.93,14.52,22.04,0.44,2.62,,,pass_forward
906288,2021102500,3998,,72,2021-10-26 03:17:06,,football,left,111.34,16.07,21.95,1.40,2.22,,,
906289,2021102500,3998,,73,2021-10-26 03:17:06,,football,left,109.74,17.60,21.81,2.02,2.21,,,
906290,2021102500,3998,,74,2021-10-26 03:17:06,,football,left,108.17,19.12,21.66,2.39,2.19,,,


In [26]:
pff = pd.read_csv("https://media.githubusercontent.com/media/cnickol26/BigDataBowl2023/main/nfl-big-data-bowl-2023/pffScoutingData.csv")

In [27]:
pff

Unnamed: 0,gameId,playId,nflId,pff_role,pff_positionLinedUp,pff_hit,pff_hurry,pff_sack,pff_beatenByDefender,pff_hitAllowed,pff_hurryAllowed,pff_sackAllowed,pff_nflIdBlockedPlayer,pff_blockType,pff_backFieldBlock
0,2021090900,97,25511,Pass,QB,,,,,,,,,,
1,2021090900,97,35481,Pass Route,TE-L,,,,,,,,,,
2,2021090900,97,35634,Pass Route,LWR,,,,,,,,,,
3,2021090900,97,39985,Pass Route,HB-R,,,,,,,,,,
4,2021090900,97,40151,Pass Block,C,,,,0.0,0.0,0.0,0.0,44955.0,SW,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188249,2021110100,4433,52507,Pass Block,LT,,,,0.0,0.0,1.0,0.0,43338.0,PP,0.0
188250,2021110100,4433,52546,Coverage,SCBoR,0.0,0.0,0.0,,,,,,,
188251,2021110100,4433,52573,Pass Route,SLoWR,,,,,,,,,,
188252,2021110100,4433,52585,Pass Rush,LEO,0.0,0.0,0.0,,,,,,,


In [28]:
# Going to join these columns in with the tracking data
pff_filter = pff[['gameId','playId','nflId','pff_role','pff_positionLinedUp','pff_nflIdBlockedPlayer','pff_blockType']]

In [29]:
locations_ball = locations.merge(pff_filter, on=['gameId','playId','nflId'], how='left')

In [30]:
plays = pd.read_csv("https://media.githubusercontent.com/media/cnickol26/BigDataBowl2023/main/nfl-big-data-bowl-2023/plays.csv")

In [31]:
plays

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,...,foulNFLId3,absoluteYardlineNumber,offenseFormation,personnelO,defendersInBox,personnelD,dropBackType,pff_playAction,pff_passCoverage,pff_passCoverageType
0,2021090900,97,(13:33) (Shotgun) T.Brady pass incomplete deep...,1,3,2,TB,DAL,TB,33,...,,43.0,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"4 DL, 2 LB, 5 DB",TRADITIONAL,0,Cover-1,Man
1,2021090900,137,(13:18) (Shotgun) D.Prescott pass deep left to...,1,1,10,DAL,TB,DAL,2,...,,108.0,EMPTY,"1 RB, 2 TE, 2 WR",6.0,"4 DL, 4 LB, 3 DB",TRADITIONAL,0,Cover-3,Zone
2,2021090900,187,(12:23) (Shotgun) D.Prescott pass short middle...,1,2,6,DAL,TB,DAL,34,...,,76.0,SHOTGUN,"0 RB, 2 TE, 3 WR",6.0,"3 DL, 3 LB, 5 DB",TRADITIONAL,0,Cover-3,Zone
3,2021090900,282,(9:56) D.Prescott pass incomplete deep left to...,1,1,10,DAL,TB,TB,39,...,,49.0,SINGLEBACK,"1 RB, 2 TE, 2 WR",6.0,"4 DL, 3 LB, 4 DB",TRADITIONAL,1,Cover-3,Zone
4,2021090900,349,(9:46) (Shotgun) D.Prescott pass incomplete sh...,1,3,15,DAL,TB,TB,44,...,,54.0,SHOTGUN,"1 RB, 1 TE, 3 WR",7.0,"3 DL, 4 LB, 4 DB",TRADITIONAL,0,Cover-3,Zone
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8553,2021110100,4310,(1:56) (Shotgun) P.Mahomes sacked at NYG 16 fo...,4,3,8,KC,NYG,NYG,8,...,,18.0,SHOTGUN,"1 RB, 1 TE, 3 WR",4.0,"1 DL, 3 LB, 7 DB",SCRAMBLE,0,Bracket,Other
8554,2021110100,4363,(1:07) (Shotgun) D.Jones pass short right to E...,4,1,10,NYG,KC,NYG,25,...,,35.0,SHOTGUN,"1 RB, 1 TE, 3 WR",5.0,"4 DL, 1 LB, 6 DB",SCRAMBLE,0,Cover-2,Zone
8555,2021110100,4392,"(1:01) (No Huddle, Shotgun) D.Jones sacked at ...",4,2,7,NYG,KC,NYG,28,...,,38.0,SHOTGUN,"1 RB, 1 TE, 3 WR",6.0,"4 DL, 1 LB, 6 DB",TRADITIONAL,0,Cover-2,Zone
8556,2021110100,4411,"(:39) (No Huddle, Shotgun) D.Jones pass incomp...",4,3,15,NYG,KC,NYG,20,...,,30.0,SHOTGUN,"1 RB, 1 TE, 3 WR",5.0,"4 DL, 1 LB, 6 DB",TRADITIONAL,0,Cover-2,Zone


In [32]:
# Also going to be added in to the tracking data
plays_filter = plays[['gameId','playId','dropBackType','pff_playAction']]

In [33]:
locations_ball = locations_ball.merge(plays_filter, on=['gameId','playId'], how='left')

In [34]:
locations_ball

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,...,dis,o,dir,event,pff_role,pff_positionLinedUp,pff_nflIdBlockedPlayer,pff_blockType,dropBackType,pff_playAction
0,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,right,37.77,24.22,...,0.03,165.16,84.99,,Pass,QB,,,TRADITIONAL,0
1,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,right,37.78,24.22,...,0.02,164.33,92.87,,Pass,QB,,,TRADITIONAL,0
2,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,right,37.78,24.24,...,0.01,160.24,68.55,,Pass,QB,,,TRADITIONAL,0
3,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,right,37.73,24.25,...,0.06,152.13,296.85,,Pass,QB,,,TRADITIONAL,0
4,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,right,37.69,24.26,...,0.04,148.33,287.55,,Pass,QB,,,TRADITIONAL,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4863391,2021102500,3998,,71,2021-10-26 03:17:05,,football,left,112.93,14.52,...,2.62,,,pass_forward,,,,,SCRAMBLE,0
4863392,2021102500,3998,,72,2021-10-26 03:17:06,,football,left,111.34,16.07,...,2.22,,,,,,,,SCRAMBLE,0
4863393,2021102500,3998,,73,2021-10-26 03:17:06,,football,left,109.74,17.60,...,2.21,,,,,,,,SCRAMBLE,0
4863394,2021102500,3998,,74,2021-10-26 03:17:06,,football,left,108.17,19.12,...,2.19,,,,,,,,SCRAMBLE,0


In [35]:
# create unique play ID for each play
locations_ball['uniqueplayId'] = locations_ball['gameId'].astype(str) + locations_ball['playId'].astype(str)

In [36]:
locations_ball['pff_positionLinedUp']

0           QB
1           QB
2           QB
3           QB
4           QB
          ... 
4863391    NaN
4863392    NaN
4863393    NaN
4863394    NaN
4863395    NaN
Name: pff_positionLinedUp, Length: 4863396, dtype: object

In [37]:
# Selecting only postions that could potentially impact the end versus tackle matchup
positions_lined_up = ['QB','LT','RT','LEO','LE','HB-L','HB-R','RE','TE-R','HB','TE-L','TE-oR','TE-iR','TE-iL','TE-oL','FB-R','FB-L','FB','ball']
locations_ball['pff_positionLinedUp'] = locations_ball['pff_positionLinedUp'].fillna('ball')
locations_ball2 = locations_ball[(locations_ball['pff_positionLinedUp'].isin(positions_lined_up))]

In [38]:
locations_ball2

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,...,o,dir,event,pff_role,pff_positionLinedUp,pff_nflIdBlockedPlayer,pff_blockType,dropBackType,pff_playAction,uniqueplayId
0,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,right,37.77,24.22,...,165.16,84.99,,Pass,QB,,,TRADITIONAL,0,202109090097
1,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,right,37.78,24.22,...,164.33,92.87,,Pass,QB,,,TRADITIONAL,0,202109090097
2,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,right,37.78,24.24,...,160.24,68.55,,Pass,QB,,,TRADITIONAL,0,202109090097
3,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,right,37.73,24.25,...,152.13,296.85,,Pass,QB,,,TRADITIONAL,0,202109090097
4,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,right,37.69,24.26,...,148.33,287.55,,Pass,QB,,,TRADITIONAL,0,202109090097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4863391,2021102500,3998,,71,2021-10-26 03:17:05,,football,left,112.93,14.52,...,,,pass_forward,,ball,,,SCRAMBLE,0,20211025003998
4863392,2021102500,3998,,72,2021-10-26 03:17:06,,football,left,111.34,16.07,...,,,,,ball,,,SCRAMBLE,0,20211025003998
4863393,2021102500,3998,,73,2021-10-26 03:17:06,,football,left,109.74,17.60,...,,,,,ball,,,SCRAMBLE,0,20211025003998
4863394,2021102500,3998,,74,2021-10-26 03:17:06,,football,left,108.17,19.12,...,,,,,ball,,,SCRAMBLE,0,20211025003998


In [39]:
# Dropping all of the rows that occur before the snap of the ball
snap_time = locations_ball2.loc[locations_ball['event'] == 'ball_snap',['uniqueplayId', 'time']].rename(columns = {'time':'ball_snap_time'})
snap_time_unique = snap_time.drop_duplicates(subset = 'uniqueplayId')

In [40]:
locations_ball3 = locations_ball2.merge(snap_time_unique, on = 'uniqueplayId', how = 'left')
locations_ball3

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,...,dir,event,pff_role,pff_positionLinedUp,pff_nflIdBlockedPlayer,pff_blockType,dropBackType,pff_playAction,uniqueplayId,ball_snap_time
0,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,right,37.77,24.22,...,84.99,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
1,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,right,37.78,24.22,...,92.87,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
2,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,right,37.78,24.24,...,68.55,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
3,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,right,37.73,24.25,...,296.85,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
4,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,right,37.69,24.26,...,287.55,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2145007,2021102500,3998,,71,2021-10-26 03:17:05,,football,left,112.93,14.52,...,,pass_forward,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145008,2021102500,3998,,72,2021-10-26 03:17:06,,football,left,111.34,16.07,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145009,2021102500,3998,,73,2021-10-26 03:17:06,,football,left,109.74,17.60,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145010,2021102500,3998,,74,2021-10-26 03:17:06,,football,left,108.17,19.12,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59


In [41]:
locations_ball3

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,...,dir,event,pff_role,pff_positionLinedUp,pff_nflIdBlockedPlayer,pff_blockType,dropBackType,pff_playAction,uniqueplayId,ball_snap_time
0,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,right,37.77,24.22,...,84.99,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
1,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,right,37.78,24.22,...,92.87,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
2,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,right,37.78,24.24,...,68.55,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
3,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,right,37.73,24.25,...,296.85,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
4,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,right,37.69,24.26,...,287.55,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2145007,2021102500,3998,,71,2021-10-26 03:17:05,,football,left,112.93,14.52,...,,pass_forward,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145008,2021102500,3998,,72,2021-10-26 03:17:06,,football,left,111.34,16.07,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145009,2021102500,3998,,73,2021-10-26 03:17:06,,football,left,109.74,17.60,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145010,2021102500,3998,,74,2021-10-26 03:17:06,,football,left,108.17,19.12,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59


In [118]:
locations_ball4 = locations_ball3[~(locations_ball3['time'] < locations_ball3['ball_snap_time'])]

In [119]:
locations_ball4

Unnamed: 0,gameId,playId,nflId,frameId,time,jerseyNumber,team,playDirection,x,y,...,dir,event,pff_role,pff_positionLinedUp,pff_nflIdBlockedPlayer,pff_blockType,dropBackType,pff_playAction,uniqueplayId,ball_snap_time
0,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,right,37.77,24.22,...,84.99,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
1,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,right,37.78,24.22,...,92.87,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
2,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,right,37.78,24.24,...,68.55,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
3,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,right,37.73,24.25,...,296.85,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
4,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,right,37.69,24.26,...,287.55,,Pass,QB,,,TRADITIONAL,0,202109090097,2021-09-10 00:26:31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2145007,2021102500,3998,,71,2021-10-26 03:17:05,,football,left,112.93,14.52,...,,pass_forward,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145008,2021102500,3998,,72,2021-10-26 03:17:06,,football,left,111.34,16.07,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145009,2021102500,3998,,73,2021-10-26 03:17:06,,football,left,109.74,17.60,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59
2145010,2021102500,3998,,74,2021-10-26 03:17:06,,football,left,108.17,19.12,...,,,,ball,,,SCRAMBLE,0,20211025003998,2021-10-26 03:16:59


In [120]:
# Add the location of the football on the play to each row then standardize x and y on this location
locations_ball4 = locations_ball4.loc[(locations_ball4['team'] == 'football') & (locations_ball4['event'] == 'ball_snap'),
                               ['uniqueplayId', 'x', 'y']
                               ].rename(columns={'x':'football_x',
                                                 'y':'football_y'}).merge(locations_ball4, on='uniqueplayId', how='left')
locations_ball4['new_x'] = abs(locations_ball4['x']-locations_ball4['football_x'])
locations_ball4['new_y'] = np.where(locations_ball4['playDirection']=='right', 
                                   locations_ball4['football_y']-locations_ball4['y'],
                                   locations_ball4['y']-locations_ball4['football_y'])

In [121]:
## Filtering to only be traditional dropbacks
locations_ball5 = locations_ball4[locations_ball4['dropBackType'] == 'TRADITIONAL']

In [123]:
locations_ball5

Unnamed: 0,uniqueplayId,football_x,football_y,gameId,playId,nflId,frameId,time,jerseyNumber,team,...,event,pff_role,pff_positionLinedUp,pff_nflIdBlockedPlayer,pff_blockType,dropBackType,pff_playAction,ball_snap_time,new_x,new_y
0,202109090097,41.56,23.92,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,...,,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.79,-0.30
1,202109090097,41.56,23.92,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,...,,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.78,-0.30
2,202109090097,41.56,23.92,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,...,,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.78,-0.32
3,202109090097,41.56,23.92,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,...,,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.83,-0.33
4,202109090097,41.56,23.92,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,...,,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.87,-0.34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2040337,20211025003926,85.95,23.68,2021102500,3926,,45,2021-10-26 03:14:08,,football,...,,,ball,,,TRADITIONAL,0,2021-10-26 03:14:05,7.18,3.67
2040338,20211025003926,85.95,23.68,2021102500,3926,,46,2021-10-26 03:14:08,,football,...,,,ball,,,TRADITIONAL,0,2021-10-26 03:14:05,7.22,3.89
2040339,20211025003926,85.95,23.68,2021102500,3926,,47,2021-10-26 03:14:09,,football,...,,,ball,,,TRADITIONAL,0,2021-10-26 03:14:05,7.27,4.08
2040340,20211025003926,85.95,23.68,2021102500,3926,,48,2021-10-26 03:14:09,,football,...,,,ball,,,TRADITIONAL,0,2021-10-26 03:14:05,7.31,4.26


### REMOVING CHIP BLOCK

In [124]:
locations_ball5['chip_side'] = np.where(locations_ball5['pff_blockType']=='CH', np.where(locations_ball5['new_y']>0, 'right', 'left'), 'none')
locations_ball5['chip_side'] = locations_ball5.groupby('uniqueplayId')['chip_side'].transform(lambda x: 'left' if 'left' in x.unique() else 'right' if 'right' in x.unique() else 'none')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locations_ball5['chip_side'] = np.where(locations_ball5['pff_blockType']=='CH', np.where(locations_ball5['new_y']>0, 'right', 'left'), 'none')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  locations_ball5['chip_side'] = locations_ball5.groupby('uniqueplayId')['chip_side'].transform(lambda x: 'left' if 'left' in x.unique() else 'right' if 'right' in x.unique() else 'none')


In [133]:
# Remove the offensive linesmen on the side of the field where the chip is 
no_chip = locations_ball5.loc[~(((locations_ball5['pff_positionLinedUp']=='LT')& (locations_ball5['chip_side']=='left'))|
                                ((locations_ball5['pff_positionLinedUp']=='RT')& (locations_ball5['chip_side']=='right')))]

In [37]:
## Now going to filter out anything after the ball has been thrown
end_time = no_chip.loc[no_chip['event'].isin(['pass_forward','qb_sack','qb_strip_sack']),['uniqueplayId', 'time']].rename(columns = {'time':'end_time'})
end_time_unique = end_time.drop_duplicates(subset = 'uniqueplayId')

In [44]:
locations_ball7 = no_chip.merge(end_time_unique, on = 'uniqueplayId', how = 'left')
locations_ball7 = locations_ball7.dropna(subset = ['end_time'])

In [45]:
# Left with only plays that resulted in a sack, strip sack, or throw by the qb
locations_ball7

Unnamed: 0,uniqueplayId,football_x,football_y,gameId,playId,nflId,frameId,time,jerseyNumber,team,...,pff_role,pff_positionLinedUp,pff_nflIdBlockedPlayer,pff_blockType,dropBackType,pff_playAction,ball_snap_time,new_x,new_y,end_time
0,202109090097,41.56,23.92,2021090900,97,25511.0,1,2021-09-10 00:26:31,12.0,TB,...,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.79,-0.30,2021-09-10 00:26:35
1,202109090097,41.56,23.92,2021090900,97,25511.0,2,2021-09-10 00:26:31,12.0,TB,...,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.78,-0.30,2021-09-10 00:26:35
2,202109090097,41.56,23.92,2021090900,97,25511.0,3,2021-09-10 00:26:31,12.0,TB,...,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.78,-0.32,2021-09-10 00:26:35
3,202109090097,41.56,23.92,2021090900,97,25511.0,4,2021-09-10 00:26:31,12.0,TB,...,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.83,-0.33,2021-09-10 00:26:35
4,202109090097,41.56,23.92,2021090900,97,25511.0,5,2021-09-10 00:26:31,12.0,TB,...,Pass,QB,,,TRADITIONAL,0,2021-09-10 00:26:31,3.87,-0.34,2021-09-10 00:26:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
925277,20211025003926,85.95,23.68,2021102500,3926,43447.0,45,2021-10-26 03:14:08,72.0,SEA,...,Pass Block,RT,37097.0,PP,TRADITIONAL,0,2021-10-26 03:14:05,5.62,2.72,2021-10-26 03:14:08
925278,20211025003926,85.95,23.68,2021102500,3926,43447.0,46,2021-10-26 03:14:08,72.0,SEA,...,Pass Block,RT,37097.0,PP,TRADITIONAL,0,2021-10-26 03:14:05,5.70,2.86,2021-10-26 03:14:08
925279,20211025003926,85.95,23.68,2021102500,3926,43447.0,47,2021-10-26 03:14:09,72.0,SEA,...,Pass Block,RT,37097.0,PP,TRADITIONAL,0,2021-10-26 03:14:05,5.77,3.00,2021-10-26 03:14:08
925280,20211025003926,85.95,23.68,2021102500,3926,43447.0,48,2021-10-26 03:14:09,72.0,SEA,...,Pass Block,RT,37097.0,PP,TRADITIONAL,0,2021-10-26 03:14:05,5.83,3.15,2021-10-26 03:14:08


In [46]:
#locations_ball7.to_csv('data_subset.csv', index = False)

In [None]:
## Next going to want to filter down the dataset/flip one of the ends to standardize