Football Injury Datasets

https://nflreadr.nflverse.com/reference/load_injuries.html

https://nflreadr.nflverse.com/reference/load_rosters.html

https://nflreadr.nflverse.com/reference/load_players.html

https://nflreadr.nflverse.com/reference/load_combine.html

https://nflreadr.nflverse.com/articles/dictionary_injuries.html

https://nflreadr.nflverse.com/articles/dictionary_rosters.html

https://nflreadr.nflverse.com/articles/dictionary_combine.html

In [None]:
# import nfl_data_py as nfl

# combined = nfl.import_combine_data([2009, 2024])
# cleancombined = nfl.clean_nfl_data(combined)
# print("COMBINE DATA")
# print("====================================")
# print(cleancombined.columns.tolist())

# rosters = nfl.import_seasonal_rosters([2009, 2024])
# cleanrosters = nfl.clean_nfl_data(rosters)
# print("ROSTER DATA")
# print("====================================")
# print(cleanrosters.columns.tolist())

# ids = nfl.import_ids()
# cleanids = nfl.clean_nfl_data(ids)
# print("ID DATA")
# print("====================================")
# print(cleanids.columns.tolist())

# injuries = nfl.import_injuries([2009, 2024])
# cleaninjuries = nfl.clean_nfl_data(injuries)
# print("INJURY DATA")
# print("====================================")
# print(cleaninjuries.columns.tolist())

COMBINE DATA
['season', 'draft_year', 'draft_team', 'draft_round', 'draft_ovr', 'pfr_id', 'cfb_id', 'player_name', 'pos', 'school', 'ht', 'wt', 'forty', 'bench', 'vertical', 'broad_jump', 'cone', 'shuttle']
ROSTER DATA
['season', 'team', 'position', 'depth_chart_position', 'jersey_number', 'status', 'player_name', 'first_name', 'last_name', 'birth_date', 'height', 'weight', 'college', 'player_id', 'espn_id', 'sportradar_id', 'yahoo_id', 'rotowire_id', 'pff_id', 'pfr_id', 'fantasy_data_id', 'sleeper_id', 'years_exp', 'headshot_url', 'ngs_position', 'week', 'game_type', 'status_description_abbr', 'football_name', 'esb_id', 'gsis_it_id', 'smart_id', 'entry_year', 'rookie_year', 'draft_club', 'draft_number', 'age']
ID DATA
['merge_name', 'mfl_id', 'rotoworld_id', 'cfbref_id', 'sleeper_id', 'team', 'college', 'yahoo_id', 'cbs_id', 'birthdate', 'pfr_id', 'twitter_username', 'stats_id', 'weight', 'age', 'draft_year', 'fantasypros_id', 'rotowire_id', 'draft_pick', 'draft_round', 'nfl_id', 'db_

In [None]:
import pandas as pd
import nfl_data_py as nfl

# Define the range of years from 2009 to 2024
years = list(range(2009, 2025))

# Import and clean data for the specified years
combine_df = nfl.clean_nfl_data(nfl.import_combine_data(years=years))
rosters_df = nfl.clean_nfl_data(nfl.import_seasonal_rosters(years=years))
injuries_df = nfl.clean_nfl_data(nfl.import_injuries(years=years))

# Standardize names to create a common key for merging
combine_df['player_key'] = combine_df['player_name'].str.lower().str.strip()
rosters_df['player_key'] = rosters_df['player_name'].str.lower().str.strip()
injuries_df['player_key'] = injuries_df['full_name'].str.lower().str.strip()

# Rename injuries season and week columns to avoid conflict with roster's season
injuries_df = injuries_df.rename(columns={'season': 'season_injury', 'week': 'week_injury'})

# Sort the injuries data by season_injury and week_injury so that earlier injury events come first
injuries_df = injuries_df.sort_values(by=['season_injury', 'week_injury'])

# Compute a cumulative injury count for each player (across all injury events)
injuries_df['prev_injury_count'] = injuries_df.groupby('player_key').cumcount()

# ---- Merge Roster and Injury Data ----
# We want to match each injury event with the roster record for that player in the same season.
# Using an outer join here ensures that:
#   - Players with injuries appear once per injury event (with the corresponding roster data for that season).
#   - Players with no injury data still appear from the roster.
roster_injury_df = pd.merge(
    rosters_df,
    injuries_df,
    left_on=['player_key', 'season'],
    right_on=['player_key', 'season_injury'],
    how='outer',
    suffixes=('_roster', '_injury')
)

# ---- Merge in Combine Data ----
# Combine data is generally one record per player.
final_df = pd.merge(
    roster_injury_df,
    combine_df.drop(columns=['season']),  # drop combine's season column if present to avoid conflict
    on='player_key',
    how='outer',
    suffixes=('', '_combine')
)

# Optionally sort the final dataset by player_key, roster season, and week_injury
final_df = final_df.sort_values(by=['player_key', 'season', 'week_injury'], na_position='last')

# Export the final merged dataset to a CSV file
final_df.to_csv("final_dataset.csv", index=False)
print("Data exported to final_dataset.csv")

# Print each column's name and data type
print("\nColumn data types:")
for col in final_df.columns:
    print(f"{col}: {final_df[col].dtype}")


Data exported to final_data_by_injury_event.csv

Column data types:
season: float64
team_roster: object
position_roster: object
depth_chart_position: object
jersey_number: object
status: object
player_name: object
first_name_roster: object
last_name_roster: object
birth_date: datetime64[ns]
height: float64
weight: float64
college: object
player_id: object
espn_id: object
sportradar_id: object
yahoo_id: object
rotowire_id: object
pff_id: object
pfr_id: object
fantasy_data_id: object
sleeper_id: object
years_exp: float64
headshot_url: object
ngs_position: object
week: float64
game_type_roster: object
status_description_abbr: object
football_name: object
esb_id: object
gsis_it_id: object
smart_id: object
entry_year: float64
rookie_year: float64
draft_club: object
draft_number: object
age: float64
player_key: object
season_injury: float64
game_type_injury: object
team_injury: object
week_injury: float64
gsis_id: object
position_injury: object
full_name: object
first_name_injury: object
las