Football Injury Datasets

https://nflreadr.nflverse.com/reference/load_injuries.html

https://nflreadr.nflverse.com/reference/load_rosters.html

https://nflreadr.nflverse.com/reference/load_players.html

https://nflreadr.nflverse.com/reference/load_combine.html

https://nflreadr.nflverse.com/articles/dictionary_injuries.html

https://nflreadr.nflverse.com/articles/dictionary_rosters.html

https://nflreadr.nflverse.com/articles/dictionary_combine.html

In [None]:
import nfl_data_py as nfl

combined = nfl.import_combine_data([2009, 2024])
cleancombined = nfl.clean_nfl_data(combined)
print("COMBINE DATA")
print("====================================")
print(combined)

rosters = nfl.import_seasonal_rosters([2009, 2024])
cleanrosters = nfl.clean_nfl_data(rosters)
print("ROSTER DATA")
print("====================================")
print(rosters)

ids = nfl.import_ids()
cleanids = nfl.clean_nfl_data(ids)
print("ID DATA")
print("====================================")
print(ids)

injuries = nfl.import_injuries([2009, 2024])
cleaninjuries = nfl.clean_nfl_data(injuries)
print("INJURY DATA")
print("====================================")
print(injuries)

In [None]:
import pandas as pd
import nfl_data_py as nfl

# Define the range of years from 2009 to 2024
years = list(range(2009, 2025))

# Import and clean data for the specified years
combine_df = nfl.clean_nfl_data(nfl.import_combine_data(years=years))
rosters_df = nfl.clean_nfl_data(nfl.import_seasonal_rosters(years=years))
injuries_df = nfl.clean_nfl_data(nfl.import_injuries(years=years))

# Standardize the name columns to improve matching
combine_df['player_name_clean'] = combine_df['player_name'].str.lower().str.strip()
rosters_df['player_name_clean'] = rosters_df['player_name'].str.lower().str.strip()
injuries_df['full_name_clean'] = injuries_df['full_name'].str.lower().str.strip()

# Order 2: Merge combine and injuries first using cleaned name fields,
# then merge the result with the rosters data.
merged_ci = pd.merge(
    combine_df,
    injuries_df,
    left_on='player_name_clean',
    right_on='full_name_clean',
    how='outer',  # using outer join to avoid losing any data
    suffixes=('_combine', '_injury')
)

final_df = pd.merge(
    merged_ci,
    rosters_df,
    on='player_name_clean',
    how='outer',  # again, using outer join for maximum data retention
    suffixes=('', '_roster')
)

# Display all column names in the final merged dataframe
print("All column names:")
print(final_df.columns.tolist())
