In [1]:
import nfl_data_py as nfl
from general_utilities import setup_logging
setup_logging()


In [2]:
import nfl_data_py as nfl
import pandas as pd
import logging

# Set up logging
logging.basicConfig(level=logging.DEBUG)
start_year = 2010
final_year = 2024

years = list(range(start_year, final_year))

try:
    # Fetch weekly rosters data
    data = nfl.import_weekly_rosters(years)

    # Check for duplicate indices in the data
    if data.index.duplicated().any():
        logging.error("Duplicate indices found in the rosters DataFrame.")

        # Option 1: Drop duplicates while keeping the first occurrence
        data = data[~data.index.duplicated(keep="first")]

        # Option 2: Reset index entirely to ensure unique index
        # data = data.reset_index(drop=True)

    # Ensure that the index is unique
    if data.index.duplicated().any():
        raise ValueError("Duplicate indices remain after attempted resolution.")

    # Log the shape and index details
    logging.debug(f"Data shape: {data.shape}")
    logging.debug(f"Data index: {data.index}")

    # Check if 'birth_date' column exists before processing
    if "birth_date" in data.columns:
        # Attempt to calculate the 'age' column
        try:
            roster_dates = data["gameday"]  # Assuming 'gameday' is a column in the data
            data["age"] = (
                (
                    pd.to_datetime(roster_dates) - pd.to_datetime(data["birth_date"])
                ).dt.days
                / 365.25
            ).round(3)
            logging.debug("Age column successfully calculated.")
        except Exception as e:
            logging.error(f"Failed to calculate the 'age' column: {e}")
    else:
        logging.warning("'birth_date' column not found in the data.")

except ValueError as ve:
    logging.error(f"ValueError occurred: {ve}")
except Exception as e:
    logging.error(f"An unexpected error occurred: {e}")