# NFLVerse

To predict future outcomes of head to head matchups or individual performance, we need to measure quality of players. 
- Use past performance to summarize players
- Weight their performance by who they played against and the situation
- Build a model that predicts future performance
- Find injury reports to estimate playing time
- Aggregate performance of players on each team; include unique schemes, player archetypes, situations, and abilities (maybe the speed of a player, maybe the performance of the OL during the 2-minute warning, injury history...)
- Build a model that predicts outcomes based on the aggregate performance

In [None]:
import sys

# Install nfl_data_py and its dependencies
!{sys.executable} -m pip install nfl_data_py numpy pandas fastparquet fsspec

In [3]:
import nfl_data_py as nfl

ModuleNotFoundError: No module named 'nfl_data_py'

In [2]:
import pandas as pd
import numpy as np

def get_data(type, years):
    base_url = 'https://github.com/nflverse/nflverse-data/releases/download/pbp/'
    types = ['play_by_play_', 'player_stats_', 'team_stats_', 'game_summaries_', 
        'rosters_', 'injuries_', 'draft_', 'schedule_']
    ext = '.csv.gz'

    if type not in types:
        print('Invalid type')
        return None
    
    data_list = []

    for year in years:
        url = base_url + type + str(year) + ext
        print('Downloading ' + url)
        df = pd.read_csv(url, compression='gzip', low_memory=False)
        data_list.append(df)

    return pd.concat(data_list)

play_by_play = get_data('play_by_play_', range(2019, 2021))
play_by_play.head()

Downloading https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_2019.csv.gz
Downloading https://github.com/nflverse/nflverse-data/releases/download/pbp/play_by_play_2020.csv.gz


Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,out_of_bounds,home_opening_kickoff,qb_epa,xyac_epa,xyac_mean_yardage,xyac_median_yardage,xyac_success,xyac_fd,xpass,pass_oe
0,1,2019_01_ATL_MIN,2019090804,MIN,ATL,REG,1,,,,...,0,0,0.0,,,,,,,
1,36,2019_01_ATL_MIN,2019090804,MIN,ATL,REG,1,ATL,away,MIN,...,0,0,0.0,,,,,,,
2,51,2019_01_ATL_MIN,2019090804,MIN,ATL,REG,1,ATL,away,MIN,...,0,0,-1.658763,,,,,,0.486799,51.320082
3,79,2019_01_ATL_MIN,2019090804,MIN,ATL,REG,1,ATL,away,MIN,...,0,0,-0.538914,,,,,,0.639994,-63.999379
4,100,2019_01_ATL_MIN,2019090804,MIN,ATL,REG,1,ATL,away,MIN,...,0,0,0.142138,,,,,,0.933516,6.648362


In [None]:
# Offensive Plays
offensive_plays = ['game_id', 
    'play_id', 'posteam', 'quarter', 'down', 'ydstogo', 'yardline_100', 'description', 'play_type', 
    'yards_gained', 'shotgun', 'no_huddle', 'qb_dropback', 'qb_kneel', 'qb_spike', 'qb_scramble', 
    'pass_length', 'pass_location', 'air_yards', 'yards_after_catch', 'run_location', 'run_gap', 
    'field_goal_result', 'kick_distance', 'extra_point_result', 'two_point_conv_result', 'rush_attempt', 
    'pass_attempt', 'sack', 'touchdown', 'pass_touchdown', 'rush_touchdown', 'return_touchdown', 
    'extra_point_attempt', 'two_point_attempt', 'field_goal_attempt', 'kickoff_attempt', 'punt_attempt', 
    'complete_pass', 'lateral_reception', 'lateral_rush', 'lateral_return', 'lateral_recovery', 
    'passer_player_id', 'passer_player_name', 'receiver_player_id', 'receiver_player_name', 
    'rusher_player_id', 'rusher_player_name', 'lateral_receiver_player_id', 'lateral_receiver_player_name', 
    'lateral_rusher_player_id', 'lateral_rusher_player_name', 'lateral_sack_player_id', 'lateral_sack_player_name', 
    'interception_player_id', 'interception_player_name', 'lateral_interception_player_id', 
    'lateral_interception_player_name', 'punt_returner_player_id', 'punt_returner_player_name', 
    'kickoff_returner_player_id', 'kickoff_returner_player_name', 'punter_player_id', 'punter_player_name', 
    'kicker_player_id', 'kicker_player_name', 'own_kickoff_recovery_player_id', 'own_kickoff_recovery_player_name', 
    'blocked_player_id', 'blocked_player_name'
]

# Defensive Plays
defensive_plays = ['game_id',
    'defteam', 'punt_blocked', 'interception', 'fumble_forced', 'fumble_not_forced', 'fumble_out_of_bounds', 
    'solo_tackle', 'safety', 'tackled_for_loss', 'fumble_lost', 'own_kickoff_recovery', 'own_kickoff_recovery_td', 
    'qb_hit', 'assist_tackle', 'forced_fumble_player_1_team', 'forced_fumble_player_1_player_id', 
    'forced_fumble_player_1_player_name', 'forced_fumble_player_2_team', 'forced_fumble_player_2_player_id', 
    'forced_fumble_player_2_player_name', 'solo_tackle_1_team', 'solo_tackle_2_team', 'solo_tackle_1_player_id', 
    'solo_tackle_2_player_id', 'solo_tackle_1_player_name', 'solo_tackle_2_player_name', 'assist_tackle_1_player_id', 
    'assist_tackle_1_player_name', 'assist_tackle_1_team', 'assist_tackle_2_player_id', 'assist_tackle_2_player_name', 
    'assist_tackle_2_team', 'assist_tackle_3_player_id', 'assist_tackle_3_player_name', 'assist_tackle_3_team', 
    'assist_tackle_4_player_id', 'assist_tackle_4_player_name', 'assist_tackle_4_team', 'pass_defense_1_player_id', 
    'pass_defense_1_player_name', 'pass_defense_2_player_id', 'pass_defense_2_player_name', 'fumbled_1_team', 
    'fumbled_1_player_id', 'fumbled_1_player_name', 'fumbled_2_player_id', 'fumbled_2_player_name', 'fumbled_2_team', 
    'fumble_recovery_1_team', 'fumble_recovery_1_yards', 'fumble_recovery_1_player_id', 'fumble_recovery_1_player_name', 
    'fumble_recovery_2_team', 'fumble_recovery_2_yards', 'fumble_recovery_2_player_id', 'fumble_recovery_2_player_name', 
    'return_team', 'return_yards'
]

# Drive Stats
drive_stats = ['game_id',
    'drive_real_start_time', 'drive_play_count', 'drive_time_of_possession', 'drive_first_downs', 'drive_inside20', 
    'drive_ended_with_score', 'drive_quarter_start', 'drive_quarter_end', 'drive_yards_penalized', 'drive_start_transition', 
    'drive_end_transition', 'drive_game_clock_start', 'drive_game_clock_end', 'drive_start_yard_line', 'drive_end_yard_line', 
    'drive_play_id_started', 'drive_play_id_ended'
]

# Weather and Stadium
weather_stadium = ['game_id',
    'stadium', 'weather', 'temp', 'wind', 'roof', 'surface'
]

# Game Summary
game_summary = ['game_id',
    'home_team', 'away_team', 'home_timeouts_remaining', 'away_timeouts_remaining', 'timeout', 'timeout_team', 
    'td_team', 'posteam_score', 'defteam_score', 'score_differential', 'posteam_score_post', 'defteam_score_post', 
    'score_differential_post', 'no_score_prob', 'opp_fg_prob', 'opp_safety_prob', 'opp_td_prob', 'fg_prob', 'safety_prob', 
    'td_prob', 'extra_point_prob', 'two_point_conversion_prob', 'ep', 'epa', 'total_home_epa', 'total_away_epa', 
    'total_home_rush_epa', 'total_away_rush_epa', 'total_home_pass_epa', 'total_away_pass_epa', 'air_epa', 'yac_epa', 
    'comp_air_epa', 'comp_yac_epa', 'total_home_comp_air_epa', 'total_away_comp_air_epa', 'total_home_comp_yac_epa', 
    'total_away_comp_yac_epa', 'total_home_raw_air_epa', 'total_away_raw_air_epa', 'total_home_raw_yac_epa', 
    'total_away_raw_yac_epa', 'wp', 'def_wp', 'home_wp', 'away_wp', 'wpa', 'home_wp_post', 'away_wp_post', 
    'total_home_rush_wpa', 'total_away_rush_wpa', 'total_home_pass_wpa', 'total_away_pass_wpa', 'air_wpa', 'yac_wpa', 
    'comp_air_wpa', 'comp_yac_wpa', 'total_home_comp_air_wpa', 'total_away_comp_air_wpa', 'total_home_comp_yac_wpa', 
    'total_away_comp_yac_wpa', 'total_home_raw_air_wpa', 'total_away_raw_air_wpa', 'total_home_raw_yac_wpa', 
    'total_away_raw_yac_wpa', 'away_score', 'home_score', 'location', 'result', 'total', 'spread_line', 'total_line', 
    'div_game', 'home_coach', 'away_coach', 'explanation', 'game_stadium', 'game_location', 'game_weather'
]

# Penalties and Challenges
penalties_challenges = ['game_id',
    'penalty', 'penalty_team', 'penalty_player_id', 'penalty_player_name', 'penalty_yards', 'replay_or_challenge', 
    'replay_or_challenge_result', 'penalty_type', 'defensive_two_point_attempt', 'defensive_two_point_conv', 
    'defensive_extra_point_attempt', 'defensive_extra_point_conv'
]

# Miscellaneous
miscellaneous = ['game_id',
    'season', 'cp', 'cpoe', 'series', 'series_success', 'series_result', 'order_sequence', 'start_time', 'time_of_day', 
    'nfl_api_id', 'play_clock', 'play_deleted', 'play_type_nfl', 'special_teams_play', 'st_play_type', 'end_clock_time', 
    'end_yard_line', 'fixed_drive', 'fixed_drive_result'
]