# Holidays merged with fake calendar data

Double check that observed dates are indeed correct, and need an extra adjustment of weekday to be weekend if it's a holiday on a weekday (Labor Day, Memorial Day).

In [1]:
import pandas as pd

In [2]:
def get_calendar(start_date: str, end_date: str) -> pd.DataFrame:

    df = pd.DataFrame(
        {'service_date': pd.date_range(start = start_date,
                                      end = end_date)})
    
    df = df.assign(
        service_date = pd.to_datetime(df.service_date)
    )
    df = df.assign(
        day_name = df.service_date.dt.day_name(),
        day_of_week = df.service_date.dt.dayofweek,
        month = df.service_date.dt.month, 
        year = df.service_date.dt.year,
        quarter = df.service_date.dt.quarter,
    )
    
    df = df.assign(
        weekday = df.apply(lambda x: 1 if x.day_of_week <= 4 else 0, axis=1)
    )
    
    return df

In [3]:
START = "2021-01-01" 
END = "2022-12-31"
df = get_calendar(START, END)

In [4]:
holidays = pd.read_parquet("./holidays.parquet")

In [5]:
df2 = pd.merge(
    df, 
    holidays.rename(columns = {"observed_date": "service_date"})[["service_date", "holiday"]],
    on = ["service_date"],
    how = "left",
)

# If it's a holiday, even if it's a weekday, it should be flagged as not a weekday
# Labor Day, Memorial Day....all fall on weekdays but should be counted as weekends
df2 = df2.assign(
    weekday = df2.apply(lambda x: 0 if x.holiday==1 else x.weekday, axis=1),
    holiday = df2.holiday.fillna(0).astype(int)
).drop_duplicates()

In [6]:
# Pick New Year's
df2[(df2.service_date >= "2021-12-30") & 
    (df2.service_date <= "2022-01-02")]

Unnamed: 0,service_date,day_name,day_of_week,month,year,quarter,weekday,holiday
365,2021-12-30,Thursday,3,12,2021,4,1,0
366,2021-12-31,Friday,4,12,2021,4,0,1
369,2022-01-01,Saturday,5,1,2022,1,0,0
370,2022-01-02,Sunday,6,1,2022,1,0,0


In [7]:
# https://stackoverflow.com/questions/13145368/how-to-find-the-maximum-value-in-a-list-of-tuples
#from operator import itemgetter

#max_date = max(CA_HOLIDAYS,key=itemgetter(0))[0]   #faster solution
#itemgetter(0)