<a href="https://colab.research.google.com/github/jasdavis10/Forecasting/blob/main/Forecast%20date%20and%20feature%20generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [64]:
import pandas as pd
from pandas.tseries.holiday import USFederalHolidayCalendar

def create_business_days_dataframe(year):
    """
    Creates a DataFrame with all business days in a given year, excluding US holidays.

    Args:
        year: The year for which to generate the business days.

    Returns:
        A pandas DataFrame with a DatetimeIndex representing business days.
        Returns None if an error occurs.
    """
    try:
        # Create a DatetimeIndex for all days in the year
        all_days = pd.date_range(start=f"{year}-01-01", end=f"{year}-12-31")

        # Create a DataFrame with the DatetimeIndex
        df = pd.DataFrame(index=all_days)

        # Create a USFederalHolidayCalendar
        cal = USFederalHolidayCalendar()

        # Get holidays for the specified year
        holidays = cal.holidays(start=f"{year}-01-01", end=f"{year}-12-31")

        # Filter out holidays and weekends
        df['is_business_day'] = True  # Initialize all days as business days
        df.loc[holidays, 'is_business_day'] = False  # Mark holidays as non-business days
        df.loc[df.index.weekday >= 5, 'is_business_day'] = False #Mark weekends as non-business days

        # Return only the business days
        business_days_df = df[df['is_business_day']]
        business_days_df = business_days_df.drop('is_business_day',axis=1)

        # Rename column to Date and reset index
        business_days_df.rename_axis('Date', axis=0, inplace=True)
        business_days_df.reset_index(inplace=True)
        return business_days_df

        # Rename index to 'Date'
        df.rename_axis('Date', axis=0, inplace=True)

    except Exception as e:
        print(f"An error occurred: {e}")
        return None

def add_date_features(df):
    """
    Adds day_of_week, month and is_first_business_day_after_holiday columnes and values to a datetime df.

    Args:
        df: The datetime df.

    Returns:
        A pandas DataFrame new columns / features.
        Returns error message if an error occurs.
    """

    # Ensure the date column is of datetime type
    if not pd.api.types.is_datetime64_any_dtype(df['Date']):
        try:
            df['Date'] = pd.to_datetime(df['Date'])
        except:
            print("Date column is not in a recognizable format. Please ensure correct date format.")
            return df

    df['day_of_week'] = df['Date'].dt.dayofweek
    df['month'] = df['Date'].dt.month

    # Identify first business day after a holiday
    cal = USFederalHolidayCalendar()
    holidays = cal.holidays(start=df['Date'].min(), end=df['Date'].max())
    df['is_first_business_day_after_holiday'] = False

    for i in range(len(df)):
        current_date = df['Date'].iloc[i]
        if current_date in holidays:
              continue

        #Check if it's a business day
        if current_date.weekday() < 5:
            previous_date = current_date - pd.Timedelta(days=1)
            while previous_date in holidays or previous_date.weekday() >= 5:
                previous_date = previous_date - pd.Timedelta(days = 1)

            if previous_date in holidays:
                df.loc[df['Date'] == current_date, 'is_first_business_day_after_holiday'] = True

    return df