In [1]:
import holidays
import pandas as pd

In [2]:
# Let's first define the date range for public holidays
start_date = '2015-01-01'
end_date = '2024-12-31'

# Let's generate a date range for all days in the specified period
date_range = pd.date_range(start=start_date, end=end_date)

In [3]:
# Now, let's initialize the US holidays object for New York
ny_holidays = holidays.UnitedStates(years=range(2012, 2024), state="NY")

# And, create a DataFrame with the date range
holiday_df = pd.DataFrame({"Date": date_range})

In [4]:
# Now, let's check if each date is a holiday in New York and get the holiday name
holiday_df["Holiday Name"] = holiday_df["Date"].apply(lambda x: ny_holidays.get(x.date()))
holiday_df["public_holiday"] = holiday_df["Holiday Name"].apply(lambda x: 1 if x else 0)

In [5]:
holiday_df.head()

Unnamed: 0,Date,Holiday Name,public_holiday
0,2015-01-01,New Year's Day,1
1,2015-01-02,,0
2,2015-01-03,,0
3,2015-01-04,,0
4,2015-01-05,,0


In [6]:
holiday_df.tail()

Unnamed: 0,Date,Holiday Name,public_holiday
3648,2024-12-27,,0
3649,2024-12-28,,0
3650,2024-12-29,,0
3651,2024-12-30,,0
3652,2024-12-31,,0


In [7]:
holiday_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3653 entries, 0 to 3652
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Date            3653 non-null   datetime64[ns]
 1   Holiday Name    149 non-null    object        
 2   public_holiday  3653 non-null   int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 85.7+ KB


In [8]:
unique_holidays = holiday_df["Holiday Name"].dropna().unique()
print("\nUnique holidays in the dataset:")
print(unique_holidays)


Unique holidays in the dataset:
["New Year's Day" 'Martin Luther King Jr. Day' "Lincoln's Birthday"
 'Susan B. Anthony Day' "Washington's Birthday" 'Memorial Day'
 'Independence Day (observed)' 'Independence Day' 'Labor Day'
 'Columbus Day' 'Election Day' 'Veterans Day' 'Thanksgiving'
 'Christmas Day' "Susan B. Anthony Day; Washington's Birthday"
 'Christmas Day (observed)' "New Year's Day (observed)"
 "Lincoln's Birthday (observed)" 'Veterans Day (observed)'
 'Juneteenth National Independence Day (observed)'
 'Juneteenth National Independence Day']


In [9]:
total_public_holidays = holiday_df["public_holiday"].sum()
print(f"\nTotal number of public holidays: {total_public_holidays}")


Total number of public holidays: 149


In [10]:
holiday_df["Year"] = holiday_df["Date"].dt.year
yearly_holiday_counts = holiday_df.groupby("Year")["public_holiday"].sum()
print("\nNumber of public holidays by year:")
print(yearly_holiday_counts)


Number of public holidays by year:
Year
2015    14
2016    13
2017    16
2018    14
2019    13
2020    14
2021    17
2022    17
2023    17
2024    14
Name: public_holiday, dtype: int64


In [11]:
# Save the dataset to a CSV file
output_file = "public_holidays_ny.csv"
holiday_df.to_csv(output_file, index=False)
print(f"The dataset has been saved as '{output_file}'.")

The dataset has been saved as 'public_holidays_ny.csv'.
