In [138]:
import sys
sys.path.append("../")
from settings import RAW_DATA_DIR
import pandas as pd
import plotly.express as px
import os
import numpy as np

In [11]:
play_df = pd.read_csv(os.path.join(RAW_DATA_DIR, 'plays.csv'))
games_df = pd.read_csv(os.path.join(RAW_DATA_DIR, 'games.csv'))

In [14]:
week_and_games = games_df[games_df.week == 1]
tracking_df = pd.read_csv(os.path.join(RAW_DATA_DIR, f'week{1}.csv'))

In [16]:
games_n_plays_df = play_df.merge(week_and_games, how='inner', on='gameId')

In [15]:
from src.features.helpers.processing import add_missing_timestamp_values

In [19]:
tracking_df = add_missing_timestamp_values(tracking_df)

Start record processing. Differentiate timestamps that have multiple records...
Time: 16:52:39
End record processing: 16:54:43


In [20]:
df_t = tracking_df.merge(games_n_plays_df, how='left', on=['gameId', 'playId'])

In [21]:
df_t.columns

Index(['time', 'x', 'y', 's', 'a', 'dis', 'o', 'dir', 'event', 'nflId',
       'displayName', 'jerseyNumber', 'position', 'frameId', 'team', 'gameId',
       'playId', 'playDirection', 'route', 'playDescription', 'quarter',
       'down', 'yardsToGo', 'possessionTeam', 'playType', 'yardlineSide',
       'yardlineNumber', 'offenseFormation', 'personnelO', 'defendersInTheBox',
       'numberOfPassRushers', 'personnelD', 'typeDropback',
       'preSnapVisitorScore', 'preSnapHomeScore', 'gameClock',
       'absoluteYardlineNumber', 'penaltyCodes', 'penaltyJerseyNumbers',
       'passResult', 'offensePlayResult', 'playResult', 'epa', 'isDefensivePI',
       'gameDate', 'gameTimeEastern', 'homeTeamAbbr', 'visitorTeamAbbr',
       'week'],
      dtype='object')

In [33]:
df_t.shape

(986022, 49)

In [40]:
gb = df_t.groupby(['gameId', 'playId'])
len(gb)

1034

In [70]:
df_c = df_t.copy()
df_c.shape

(986022, 49)

In [72]:
for name, group in gb:
    game_id, play_id = name
    
    # if group does not contain pass forward, drop it
    if all(group.event != 'pass_forward'):
        df_c = df_c[(df_c.gameId != game_id) | (df_c.playId != play_id)]
df_c.shape

(918456, 49)

In [117]:
gb_2 = df_c.groupby(['gameId', 'playId'])

In [148]:
df_d = df_c.copy().sort_index()
df_d.shape

(918456, 49)

In [149]:
max_groups = len(gb_2)
delete_list = []
for i, data in enumerate(gb_2):
    #print(f'Processing {i}/{max_groups}')
    name, group = data
    game_id, play_id = name
    
    drop_end = group[group.event == 'pass_forward'].head(1).index[0]
    delete_list = group[group.index < drop_end].index.to_list()
    df_d = df_d.drop(delete_list)
df_d.shape

(388245, 49)

In [150]:
df_d.event.value_counts()

None                         332813
pass_forward                  14166
pass_arrived                  11410
pass_outcome_caught            8133
tackle                         6567
first_contact                  6457
pass_outcome_incomplete        5291
out_of_bounds                  1728
pass_outcome_interception       485
touchdown                       352
pass_tipped                     280
pass_outcome_touchdown          214
fumble                          143
fumble_defense_recovered        100
fumble_offense_recovered         44
penalty_flag                     32
touchback                        30
Name: event, dtype: int64

In [152]:
df_d.penaltyJerseyNumbers.value_counts()

CLE 95                  1517
BUF 13                  1458
MIA 97;TEN 33;MIA 28    1232
LA 41                   1065
ARI 28                   967
                        ... 
TB 58                    165
CIN 22                   154
CLE 69                   144
CLE 80                   144
IND 11                   140
Name: penaltyJerseyNumbers, Length: 66, dtype: int64

In [153]:
def animated_slice(df):
    fig = px.scatter(
        df, 
        x='x', y='y', color='team', text='position', animation_frame='time', animation_group='displayName',
        range_x=[-10, 130], range_y=[-10, 60],
        hover_data=['displayName', 'jerseyNumber', 's', 'a', 'dis', 'o', 'dir', 'playDirection'])
    fig.update_traces(textposition='top center', marker_size=10)
    # , transition = {'duration': 500}
    fig.update_layout(paper_bgcolor='darkgreen', plot_bgcolor='darkgreen', font_color='white')
    
    return fig

In [158]:
df_cd = df_d.sort_values('time').reset_index(drop=True)
df_cd.head(30)

Unnamed: 0,time,x,y,s,a,dis,o,dir,event,nflId,...,passResult,offensePlayResult,playResult,epa,isDefensivePI,gameDate,gameTimeEastern,homeTeamAbbr,visitorTeamAbbr,week
0,2018-09-07T01:07:18.099Z,96.43,26.78,0.84,2.05,0.08,356.4,333.39,pass_forward,310.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
1,2018-09-07T01:07:18.099Z,96.9,26.97,2.05,3.76,0.22,,,pass_forward,,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
2,2018-09-07T01:07:18.099Z,89.62,18.39,6.75,1.51,0.67,197.78,208.19,pass_forward,2559033.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
3,2018-09-07T01:07:18.099Z,83.06,25.98,4.08,3.73,0.4,42.57,303.26,pass_forward,2558175.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
4,2018-09-07T01:07:18.099Z,83.2,21.36,6.21,2.68,0.62,262.52,260.44,pass_forward,2555415.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
5,2018-09-07T01:07:18.099Z,79.51,20.0,2.99,0.69,0.3,78.25,259.75,pass_forward,2552689.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
6,2018-09-07T01:07:18.099Z,86.1,33.53,3.75,1.49,0.37,52.23,311.29,pass_forward,2552315.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
7,2018-09-07T01:07:18.099Z,76.53,44.93,4.68,2.18,0.47,182.14,277.48,pass_forward,2555383.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
8,2018-09-07T01:07:18.099Z,69.63,30.81,2.93,2.65,0.3,79.24,315.98,pass_forward,2534832.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1
9,2018-09-07T01:07:18.099Z,82.65,34.56,6.35,1.21,0.64,262.06,272.78,pass_forward,2533040.0,...,C,10,10,0.261827,False,09/06/2018,20:20:00,PHI,ATL,1


In [159]:
m_grouped = df_cd.groupby(['gameId', 'playId'])

In [160]:
instances_threshold = 10
dpi_dfs = []
non_dpi_dfs =  []
for i, group_data in enumerate(m_grouped):
    name, group = group_data
    game_id, play_id = name
    
    # exit condition
    if (len(dpi_dfs) >= instances_threshold) and (len(non_dpi_dfs) >= instances_threshold):
        break
        
    is_dpi = (group.isDefensivePI).all()
    
    if is_dpi and (len(dpi_dfs) < instances_threshold):
        dpi_dfs.append((game_id, play_id))
        continue
        
    if not is_dpi and (len(non_dpi_dfs) < instances_threshold):
        non_dpi_dfs.append((game_id, play_id))
        
print(f'Dpi shape {np.shape(dpi_dfs)}, non dpi shape {np.shape(non_dpi_dfs)}')

Dpi shape (10, 2), non dpi shape (10, 2)


In [161]:
for dpi_play in dpi_dfs:
    dpi_slice = df_cd[(df_cd.gameId == dpi_play[0]) & (df_cd.playId == dpi_play[1])]
    fig = animated_slice(dpi_slice)
    fig.show()