In [15]:
# Kaggle NFL Big Data Bowl 2022 Submission

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Read data into memory
games_df = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')
players_df = pd.read_csv('../input/nfl-big-data-bowl-2022/players.csv')
plays_df = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')
pff_scouting_df = pd.read_csv('../input/nfl-big-data-bowl-2022/PFFScoutingData.csv')

plays_df.info()
plays_df.head()

In [16]:
# Distinct result types
plays_df[plays_df['specialTeamsPlayType'] == 'Punt']['specialTeamsResult'].unique()

In [17]:
# Non special teams includes trick plays, fumbled snaps, etc...
plays_df[plays_df['specialTeamsResult'] == 'Non-Special Teams Result'].head()

In [24]:
# Muffed punts
muffed_punt_filter = (plays_df['specialTeamsResult'] == 'Muffed') & (plays_df['specialTeamsPlayType'] == 'Punt')
plays_df[muffed_punt_filter][['playDescription', 'kickReturnYardage']].head(20)

In [23]:
# Players that have caught punts
punt_plays_df = plays_df[plays_df['specialTeamsPlayType'] == 'Punt'].dropna(subset=['returnerId'])
punt_plays_df['primaryReturnerId'] = punt_plays_df['returnerId'].str.split(';').str[0].astype('int64')
punt_returns_df = pd.merge(punt_plays_df, players_df, left_on='primaryReturnerId', right_on='nflId')
punt_returns_columns = [
    'gameId',
    'playDescription',
    'specialTeamsResult',
    'returnerId',
    'nflId',
    'displayName'
]
punt_returns_df.head()[punt_returns_columns]

In [21]:
# Distribution of punts caught by player
punt_returners_grouped_df = punt_returns_df.groupby(['primaryReturnerId', 'displayName']).size() \
    .reset_index(name='puntsCaught')
punt_returners_grouped_df.sort_values('puntsCaught', ascending=False).head(20)

In [None]:
# Group by return result (positive, neutral, negative)
conditions = [
    (punt_returns_df['kickReturnYardage'] > 0),
    (punt_returns_df['kickReturnYardage'] == 0) | (punt_returns_df['kickReturnYardage'].isna()),
    (punt_returns_df['kickReturnYardage'] < 0),
]
result_types = ['positive', 'neutral', 'negative']
punt_returns_df['returnResult'] = np.select(conditions, result_types)
punt_returns_df.groupby(['primaryReturnerId', 'displayName', 'returnResult']).size()

In [None]:
# Add isTouchdown column
punt_returns_df['isTouchdown'] = punt_returns_df['playDescription'].str.contains('TOUCHDOWN')
punt_returns_df[['playDescription', 'isTouchdown']].head()