In [None]:
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import math
from scipy.spatial import ConvexHull, convex_hull_plot_2d
import imageio
import numpy as np
import warnings
import torch
from torch import nn
import torch.optim as opt

In [40]:
from tensorflow.keras import backend as K
from tensorflow.keras.models import load_model

model = load_model('model_full_fold.h5')



In [39]:
def get_tracking_and_plays(filepath):
    """
    Return a dataframe containing cleaned tracking data joined with the plays data
    """
    tracking = pd.read_csv(filepath)
    plays = pd.read_csv("plays.csv")
    tracking = tracking[tracking['playId'].isin(tracking[tracking['event'] != 'fumble']['playId'].unique())]
    plays = plays[plays['playNullifiedByPenalty'] == 'N']

    tracking.loc[tracking['playDirection'] == 'left', 'x'] = 120 - tracking.loc[tracking['playDirection'] == 'left', 'x']
    tracking.loc[tracking['playDirection'] == 'left', 'y'] = (160/3) - tracking.loc[tracking['playDirection'] == 'left', 'y']
    tracking.loc[tracking['playDirection'] == 'left', 'dir'] += 180
    tracking.loc[tracking['dir'] > 360, 'dir'] -= 360
    tracking.loc[tracking['playDirection'] == 'left', 'o'] += 180
    tracking.loc[tracking['o'] > 360, 'o'] -= 360

    tracking_with_plays = tracking.merge(plays, on=['gameId', 'playId'], how='left')

    tracking_with_plays['is_on_offense'] = tracking_with_plays['club'] == tracking_with_plays['possessionTeam']
    tracking_with_plays['is_on_defence'] = tracking_with_plays['club'] == tracking_with_plays['defensiveTeam']
    tracking_with_plays['is_ballcarrier'] = tracking_with_plays['ballCarrierId'] == tracking_with_plays['nflId']

    end_frame = tracking_with_plays[tracking_with_plays['event'].isin(['tackle', 'out_of_bounds'])].groupby('playId')['frameId'].min().reset_index()
    end_frame.rename(columns={'frameId': 'frameId_end'}, inplace=True)

    start_frame = tracking_with_plays[tracking_with_plays['event'].isin(['run', 'lateral', 'run_pass_option', 'handoff', 'pass_arrived'])].groupby('playId')['frameId'].min().reset_index()
    start_frame.rename(columns={'frameId': 'frameId_start'}, inplace=True)

    tracking_with_plays = tracking_with_plays.merge(end_frame, on='playId', how='left')
    tracking_with_plays = tracking_with_plays.merge(start_frame, on='playId', how='left')

    tracking_with_plays = tracking_with_plays[(tracking_with_plays['frameId'] <= tracking_with_plays['frameId_end']) &
                                              (tracking_with_plays['frameId'] >= tracking_with_plays['frameId_start'])]

    return tracking_with_plays

In [161]:
tracking_with_plays = get_tracking_and_plays('tracking_week_3.csv')

tracking_with_plays.loc[(tracking_with_plays['gameId']==2022090800) & (tracking_with_plays['playId']==2022090800)]

Unnamed: 0,gameId,playId,nflId,displayName,frameId,time,jerseyNumber,club,playDirection,x,...,expectedPointsAdded,foulName1,foulName2,foulNFLId1,foulNFLId2,is_on_offense,is_on_defence,is_ballcarrier,frameId_end,frameId_start


In [None]:
def create_feature_tensor(feature_df):
    """
    Convert the input frame_df to a 4D tensor.
        - The first dimension is the frame
        - The second dimension is the index of the current player
        - The third dimension is the index of the relative player
    """
    tensor_shape = (feature_df.groupby(['gameId', 'playId', 'frameId']).ngroups, 10, 11, 10)
    input_tensor = np.zeros(tensor_shape)
    cur_count = 0
    for game_id, game_group in feature_df.groupby('gameId'):
        for play_id, play_group in game_group.groupby('playId'):
            for frame_id, frame_group in play_group.groupby('frameId'):
                offense = frame_group[(frame_group['is_on_offense']) & (~frame_group['is_ballcarrier'])]
                defence = frame_group[frame_group['is_on_defence']]
                ballcarrier = frame_group[frame_group['is_ballcarrier']]
                ballcarrier_sx = ballcarrier.s * np.cos(np.deg2rad(ballcarrier.dir))
                ballcarrier_sy = ballcarrier.s * np.sin(np.deg2rad(ballcarrier.dir))
                for i, def_player in enumerate(defence.itertuples()):
                    def_player_sx = def_player.s * np.cos(np.deg2rad(def_player.dir))
                    def_player_sy = def_player.s * np.sin(np.deg2rad(def_player.dir))
                    for j, off_player in enumerate(offense.itertuples()):
                        off_player_sx = off_player.s * np.cos(np.deg2rad(off_player.dir))
                        off_player_sy = off_player.s * np.sin(np.deg2rad(off_player.dir))
                        input_tensor[cur_count, 0, i, j] = off_player.x - def_player.x
                        input_tensor[cur_count, 1, i, j] = off_player.y - def_player.y
                        input_tensor[cur_count, 2, i, j] = def_player_sx
                        input_tensor[cur_count, 3, i, j] = def_player_sy
                        input_tensor[cur_count, 4, i, j] = def_player_sx - ballcarrier_sx
                        input_tensor[cur_count, 5, i, j] = def_player_sy - ballcarrier_sy
                        input_tensor[cur_count, 6, i, j] = def_player.x - ballcarrier.x
                        input_tensor[cur_count, 7, i, j] = def_player.y - ballcarrier.y
                        input_tensor[cur_count, 8, i, j] = off_player_sx - def_player_sx
                        input_tensor[cur_count, 9, i, j] = off_player_sy - def_player_sy
                cur_count += 1
    return input_tensor

In [173]:
import numpy as np

def change_distance(df, displayName, amount):
    df['sx'] = df['s'] * np.cos(np.deg2rad(df['dir']))
    df['sy'] = df['s'] * np.sin(np.deg2rad(df['dir']))

    filtered_df = df[df['displayName'] == displayName].copy()
    filtered_df['px'] = filtered_df['x'].shift(1)
    filtered_df['pxs'] = filtered_df['sx'].shift(1)
    filtered_df['py'] = filtered_df['y'].shift(1)
    filtered_df['psy'] = filtered_df['sy'].shift(1)

    for index, row in filtered_df.iterrows():
        if pd.isna(row['px']) or pd.isna(row['pxs']) or pd.isna(row['py']) or pd.isna(row['psy']):
            continue
        
        # print(df.at[index, 'x'])
        df.at[index, 'sx'] = row['pxs']
        df.at[index, 'sy'] = row['psy']
        df.at[index, 'x'] = row['px'] + amount * row['pxs']
        df.at[index, 'y'] = row['py'] + amount * row['psy']
        # print(df.at[index, 'x'])
    return df

In [174]:
def compare_yard_prediction(tracking_with_plays, displayName, gameId, playId, amount):
  play_info = tracking_with_plays.loc[(tracking_with_plays['playId'] == playId) & (tracking_with_plays['gameId'] == gameId)]
  original_prediction = model.predict(create_feature_tensor(play_info))
  changed_play_info = change_distance(play_info, displayName, amount)
  changed_prediction = model.predict(create_feature_tensor(changed_play_info))
  prediction_difference = changed_prediction - original_prediction
  results = pd.DataFrame({
        'Original Prediction': original_prediction.flatten(),
        'Changed Prediction': changed_prediction.flatten(),
        'Prediction Difference': prediction_difference.flatten()
    })
  print(1)
  return results

In [184]:
compare_yard_prediction(tracking_with_plays, 'Terrell Edmunds', 2022092200, 601, 0.1)

1


Unnamed: 0,Original Prediction,Changed Prediction,Prediction Difference
0,1.265527,1.265527,0.0
1,1.294765,1.294067,-0.000697
2,1.333917,1.333606,-0.000312
3,1.363687,1.363737,5e-05
4,1.381868,1.382601,0.000733
5,1.395878,1.398013,0.002134
6,1.364179,1.367338,0.003159
7,1.324557,1.331004,0.006447
8,1.357734,1.363408,0.005674
9,1.364234,1.374025,0.009791


In [None]:
tracking_with_plays.loc[(tracking_with_plays['gameId']==2022090800) & (tracking_with_plays['playId']==56)]

In [None]:
create_feature_tensor(tracking_with_plays.loc[(tracking_with_plays['playId'] == 80) & (tracking_with_plays['gameId'] == 2022090800)])[0][0][10]

In [None]:
import matplotlib.pyplot as plt

def plot_prediction_difference(df, gameId, playId, amount, player1, player2):
    player1_pred = compare_yard_prediction(df, player1, gameId, playId, amount)
    player2_pred = compare_yard_prediction(df, player2, gameId, playId, amount)
    pred_difference = player2_pred['Changed Prediction'] - player1_pred['Changed Prediction']
    frames = range(len(player1_pred))
    plt.figure(figsize=(10, 6))
    plt.plot(frames, pred_difference, color='blue')
    plt.title('Yard Prediction Differences per Frame on Projected Movements')
    plt.xlabel('Frame')
    plt.ylabel('Yard Prediction')
    note = f"Difference is {player2}'s - {player1}'s predicted yards"
    plt.text(0.5, 0.01, note, transform=plt.gca().transAxes, fontsize=9, va='bottom', ha='center')
    plt.grid(True)
    plt.show()

In [165]:
plot_prediction_difference(tracking_with_plays, 2022090800, 577, 0.1, 'Levi Wallace', 'M')

NameError: name 'plot_prediction_difference' is not defined

In [177]:
plays=pd.read_csv('plays.csv')
games=pd.read_csv('games.csv')
tracking1 = get_tracking_and_plays('tracking_week_1.csv')
tracking2 = get_tracking_and_plays('tracking_week_2.csv')
tracking3 = get_tracking_and_plays('tracking_week_3.csv')
tracking4 = get_tracking_and_plays('tracking_week_4.csv')
tracking5 = get_tracking_and_plays('tracking_week_5.csv')
tracking6 = get_tracking_and_plays('tracking_week_6.csv')
tracking7 = get_tracking_and_plays('tracking_week_7.csv')
tracking8 = get_tracking_and_plays('tracking_week_8.csv')
tracking9 = get_tracking_and_plays('tracking_week_9.csv')

In [178]:
def get_tracking(week_num):
    if week_num==1:
        return tracking1
    elif week_num==2:
        return tracking2
    elif week_num==3:
        return tracking3
    elif week_num==4:
        return tracking4
    elif week_num==5:
        return tracking5
    elif week_num==6:
        return tracking6
    elif week_num==7:
        return tracking7
    elif week_num==8:
        return tracking8
    else:
        return tracking9

In [179]:
warnings.filterwarnings('ignore')
distinct_plays=plays.sort_values(by=['gameId','playId'])[['gameId', 'playId']]
results_dict={}

counter=0
for i, row in distinct_plays.iterrows():
    week_number=int(games.loc[games['gameId']==row['gameId']]['week'])
    tracking=get_tracking(week_number)
    players=tracking.loc[(tracking['gameId']==row['gameId']) & (tracking['playId']==row['playId']) & (tracking['is_on_defence']==True)]['displayName'].unique()
    for j in players:
        final_df=compare_yard_prediction(tracking, j, int(row['gameId']), int(row['playId']), 0.1)
        print(len(final_df))
        cumulative_impact=sum(final_df['Prediction Difference'])
        if j in results_dict:
            results_dict[j]+=cumulative_impact
        else:
            results_dict[j]=cumulative_impact
    counter+=1
    if counter>10:
        break

1
17
1
17
1
17
1
17
1
17


ValueError: setting an array element with a sequence.

In [118]:
distinct_plays

Unnamed: 0,gameId,playId
4889,2022090800,56
2819,2022090800,80
7700,2022090800,101
6265,2022090800,122
3627,2022090800,146
...,...,...
11770,2022110700,3658
11622,2022110700,3686
1888,2022110700,3707
9338,2022110700,3740


In [None]:
def get_prediction_difference(tracking_with_plays, gameId, playId):
  play_info = tracking_with_plays.loc[(tracking_with_plays['playId'] == playId) & (tracking_with_plays['gameId'] == gameId)]
  first_coord_football=float(play_info[(play_info['frameId']==play_info['frameId_start']) & (play_info['displayName']=='football')]['x'])
  last_coord_football=float(play_info[(play_info['frameId']==play_info['frameId_end']) & (play_info['displayName']=='football')]['x'])
  original_prediction = model.predict(create_feature_tensor(play_info))
  actual_yards=last_coord_football-first_coord_football
  ball_carrier=play_info.iloc[0]['ballCarrierDisplayName']
  defense_name=play_info.iloc[0]['defensiveTeam']
  return actual_yards-original_prediction[0][0], ball_carrier, defense_name

In [None]:
get_prediction_difference(tracking1, 2022090800, 818)

In [30]:
warnings.filterwarnings('ignore')
distinct_plays=plays.sort_values(by=['gameId','playId'])[['gameId', 'playId']]
team_results={}

games=games.sort_values(by='week')

counter=0
for i, row in distinct_plays.iterrows():
    week_number=int(games.loc[games['gameId']==row['gameId']]['week'])
    tracking=get_tracking(week_number)
    try:
        tuple_vals=get_prediction_difference(tracking, row['gameId'], row['playId'])
        if tuple_vals[1] in team_results:
         team_results[tuple_vals[1]]+=tuple_vals[0]
        else:
            team_results[tuple_vals[1]]=tuple_vals[0]
        if tuple_vals[2] in team_results:
            team_results[tuple_vals[2]]-=tuple_vals[0]
        else:
            team_results[tuple_vals[2]]=tuple_vals[0]*-1
    except:
        print(row['gameId'], row['playId'])
    counter+=1

team_results

2022100201 3542
2022100202 709
2022100202 881
2022100202 1009
2022100202 1293
2022100202 1369
2022100202 1433
2022100202 2019
2022100202 2373
2022100202 2574
2022100202 3268
2022100202 3483
2022100202 3621
2022100203 2440
2022100203 2786
2022100203 3282
2022100204 957
2022100204 1198
2022100204 1958
2022100204 2860
2022100204 4070
2022100205 560
2022100205 736
2022100205 1384
2022100205 1570
2022100205 1700
2022100205 2648
2022100205 2939
2022100205 3736
2022100205 3760
2022100205 4072
2022100205 4228
2022100205 4264
2022100205 4387
2022100206 695
2022100206 988
2022100206 1176
2022100206 1224
2022100206 1434
2022100206 2747
2022100207 1102
2022100207 1380
2022100207 1458
2022100207 1562
2022100207 1597
2022100207 2443
2022100207 3351
2022100208 277
2022100208 518
2022100208 1107
2022100208 1348
2022100209 617
2022100209 675
2022100209 1146
2022100209 1248
2022100209 1602
2022100209 3196
2022100209 3368
2022100210 2576
2022100210 3043
2022100210 3214
2022100210 3575
2022100210 3810
202

{'Stefon Diggs': 76.70538434386184,
 'LA': -339.78115597367446,
 'Josh Allen': 87.92232686281224,
 'Devin Singletary': 86.04957437515299,
 'Zack Moss': 68.79053702950544,
 'Darrell Henderson': -14.729316651819747,
 'BUF': -453.1729350984132,
 'Cooper Kupp': 44.876804918051114,
 'Tyler Higbee': 64.56052201986364,
 'Ben Skowronek': 37.36804196238532,
 'Jamison Crowder': 2.415125489235052,
 'Isaiah McKenzie': 6.111850976943934,
 'Dawson Knox': -26.23320931196196,
 'Cam Akers': -79.77077853679748,
 'Gabe Davis': 11.16139400005316,
 'Brandon Powell': 40.243643820286955,
 'Allen Robinson': -20.70005300641074,
 'Matthew Stafford': -3.0891325473784974,
 'Marcus Mariota': 123.30519008636465,
 'NO': -140.67162051796657,
 'Cordarrelle Patterson': 23.24673151970007,
 'Parker Hesse': 13.075969815255043,
 'Damien Williams': -2.1463484764098766,
 'Kyle Pitts': 59.74632185697537,
 'Alvin Kamara': 29.95114758610742,
 'ATL': -244.07688891887352,
 'Jameis Winston': -3.116720199585018,
 'Olamide Zaccheaus

In [35]:
import operator
sorted_d = sorted(team_results.items(), key=operator.itemgetter(1), reverse=True)
sorted_d

[('Lamar Jackson', 439.4007861018178),
 ('Travis Etienne', 285.69930595159553),
 ('Justin Fields', 279.5212503373616),
 ('Miles Sanders', 274.7611335515975),
 ('Rhamondre Stevenson', 223.9665178358567),
 ('Khalil Herbert', 211.95844608545283),
 ('Kyler Murray', 184.53632259368902),
 ('Saquon Barkley', 182.89138776064118),
 ('Dameon Pierce', 165.26517754793426),
 ('Aaron Jones', 162.49561327695807),
 ('Justin Jefferson', 160.74112063646353),
 ('Kenyan Drake', 157.46185284853),
 ('Austin Ekeler', 151.79632234573302),
 ('Dallas Goedert', 143.32377177476965),
 ('Terry McLaurin', 137.83208358287843),
 ('Tyler Allgeier', 137.61533141136047),
 ('Christian McCaffrey', 137.3318694233894),
 ('David Montgomery', 135.41521602869113),
 ('Deebo Samuel', 133.47172492742573),
 ('Joe Mixon', 127.55844670534117),
 ('David Njoku', 127.48708564043018),
 ('Jaylen Waddle', 125.41608536243342),
 ('Marcus Mariota', 123.30519008636465),
 ('Jaylen Warren', 120.0756590068334),
 ('James Conner', 118.8274457454697

In [None]:
plays.sort_values(by=['gameId','playId'])

In [142]:
def get_broken_tackles(tracking_with_plays, gameId, playId):
  play_info = tracking_with_plays.loc[(tracking_with_plays['playId'] == playId) & (tracking_with_plays['gameId'] == gameId)].reset_index()
  original_prediction = model.predict(create_feature_tensor(play_info))
  print(original_prediction)
  index=np.argmax(original_prediction<1)
  tackle_evasion=0
  tackle_evaded_yards=0
  ball_carrier=play_info.iloc[0]['ballCarrierDisplayName']
  defense_name=play_info.iloc[0]['defensiveTeam']
  if index>0:
    first_coord_football=float(play_info[(play_info['displayName']=='football')].iloc[index]['x'])
    last_coord_football=float(play_info[(play_info['frameId']==play_info['frameId_end']) & (play_info['displayName']=='football')]['x'])
    if (last_coord_football-first_coord_football)>5:
      tackle_evasion=1
      tackle_evaded_yards=last_coord_football-first_coord_football 
  return tackle_evasion, tackle_evaded_yards, ball_carrier, defense_name

In [143]:
# print(get_broken_tackles(tracking_with_plays, 2022090800, 1385))
for i in tracking3.loc[(tracking3['gameId']==2022092200)]['playId'].unique():
    print(get_broken_tackles(tracking3, 2022092200, i))
    print(i)

[[6.482896 ]
 [6.6362042]
 [6.8344035]
 [6.826794 ]
 [6.2294407]
 [5.9022765]]
(0, 0, 'Nick Chubb', 'PIT')
56
[[9.932526 ]
 [9.769261 ]
 [9.569338 ]
 [9.420176 ]
 [9.231782 ]
 [8.768669 ]
 [8.432543 ]
 [8.494561 ]
 [7.9851685]
 [7.7873187]
 [7.9897714]
 [7.7211947]
 [7.4088902]
 [7.1591086]
 [6.9679074]]
(0, 0, 'Nick Chubb', 'PIT')
84
[[3.583621 ]
 [3.5843077]
 [3.9062724]
 [3.922019 ]
 [3.7912898]
 [3.4552898]
 [3.1442008]
 [2.993904 ]
 [2.7197647]
 [2.6206098]
 [2.5315604]
 [2.2750049]
 [2.1844869]
 [1.7969013]
 [1.3702669]
 [1.0722281]
 [0.810531 ]
 [0.6359462]
 [0.5288499]
 [0.4201563]]
(0, 0, 'Amari Cooper', 'PIT')
127
[[3.9090176]
 [2.8939042]
 [1.9943975]
 [1.2769051]
 [0.7578586]]
(0, 0, 'Diontae Johnson', 'CLE')
190
[[4.7479444]
 [4.7161384]
 [4.634355 ]
 [4.6810923]
 [4.782268 ]
 [4.775043 ]
 [4.3200126]
 [3.9170794]
 [3.7872577]
 [3.5853543]
 [3.947011 ]
 [4.7467856]
 [5.929582 ]
 [7.842136 ]
 [9.089625 ]
 [9.61278  ]
 [8.966873 ]
 [7.987228 ]
 [7.1302366]
 [6.4550977]
 [5.5

In [152]:
tracking3.loc[(tracking3['playId'] == 419)& (tracking3['gameId']==2022092200)]['gameClock']

6388    8:37
6389    8:37
6390    8:37
6391    8:37
6392    8:37
        ... 
8133    8:37
8134    8:37
8135    8:37
8136    8:37
8137    8:37
Name: gameClock, Length: 1288, dtype: object

In [None]:
warnings.filterwarnings('ignore')
distinct_plays=plays.sort_values(by=['gameId','playId'])[['gameId', 'playId']]
player_results_broken_tackles={}
player_results_expected_yards={}

games=games.sort_values(by='week')

counter=0
for i, row in distinct_plays.iterrows():
    week_number=int(games.loc[games['gameId']==row['gameId']]['week'])
    tracking=get_tracking(week_number)
    try:
        tuple_vals=get_prediction_difference(tracking, row['gameId'], row['playId'])
        if tuple_vals[1] in team_results:
         player_results[tuple_vals[1]]+=tuple_vals[0]
        else:
            team_results[tuple_vals[1]]=tuple_vals[0]
        if tuple_vals[2] in team_results:
            team_results[tuple_vals[2]]-=tuple_vals[0]
        else:
            team_results[tuple_vals[2]]=tuple_vals[0]*-1
    except:
        print(row['gameId'], row['playId'])
    counter+=1

team_results