In [64]:
import pandas as pd
import holidays
from holidays.constants import PUBLIC, UNOFFICIAL
from pathlib import Path

In [None]:
start_ts = pd.Timestamp("1990-01-01")
end_ts = pd.Timestamp("2024-12-31")
years = range(start_ts.year, end_ts.year + 1)

out_dir = Path("..") / "data" / "holidays" / "raw"
out_dir.mkdir(parents=True, exist_ok=True)

In [66]:
us_public_holidays = holidays.country_holidays("US", categories=PUBLIC, years=years)
us_unofficial_holidays = holidays.country_holidays("US", categories=UNOFFICIAL, years=years)

nyse_holidays = holidays.financial_holidays("NYSE", years=years)

In [72]:
all_dates = sorted(
    set(us_public_holidays.keys())
    | set(us_unofficial_holidays.keys())
    | set(nyse_holidays.keys())
)
df = pd.DataFrame({"timestamp": pd.to_datetime(all_dates)})
df = df[(df["timestamp"] >= start_ts) & (df["timestamp"] <= end_ts)].reset_index(drop=True)

date_keys = df["timestamp"].map(lambda ts: ts.date())
df["publicHolidaysUS"] = date_keys.map(us_public_holidays.get)
df["unofficialHolidaysUS"] = date_keys.map(us_unofficial_holidays.get)
df["financialHolidaysNYSE"] = date_keys.map(nyse_holidays.get)

In [73]:
df

Unnamed: 0,timestamp,publicHolidaysUS,unofficialHolidaysUS,financialHolidaysNYSE
0,1990-01-01,New Year's Day,,New Year's Day
1,1990-01-15,Martin Luther King Jr. Day,,
2,1990-02-02,,Groundhog Day,
3,1990-02-14,,Valentine's Day,
4,1990-02-19,Washington's Birthday,,Washington's Birthday
...,...,...,...,...
654,2024-10-31,,Halloween,
655,2024-11-05,,Election Day,
656,2024-11-11,Veterans Day,,
657,2024-11-28,Thanksgiving Day,,Thanksgiving Day


In [None]:
us_df   = df[["timestamp", "publicHolidaysUS", "unofficialHolidaysUS"]]
nyse_df = df[["timestamp", "financialHolidaysNYSE"]]

In [74]:
us_df.to_csv(out_dir / "us_holidays.csv", index=False)
us_df.to_parquet(out_dir / "us_holidays.parquet", index=False)

nyse_df.to_csv(out_dir / "nyse_holidays.csv", index=False)
nyse_df.to_parquet(out_dir / "nyse_holidays.parquet", index=False)