# Create holidays calendar data set

In [29]:
from getpass import getpass
import pandas as pd
import numpy as np
import requests

In [6]:
# Calendarific API key
api_key = getpass(prompt='Enter Calendarific API key')

Enter Calendarific API key ········


In [30]:
def get_holidays_for_year(year):
    url = (f'https://calendarific.com/api/v2/holidays?country=US&type=national')
    params = {
      'api_key': api_key,
      'year': year,
    }
    
    r = requests.get(url, params=params)
    r.raise_for_status()
    return r.json()['response']['holidays']

In [47]:
def get_holidays_df_for_year_range(start_year, end_year):
    years = np.arange(start_year, end_year, 1)
    dfs = []
    for year in years:
        holidays = get_holidays_for_year(year)
        dates = [h['date']['iso'] for h in holidays]    
        names = [h['name'] for h in holidays]
        df = pd.DataFrame({'date': dates, 'name': names})
        dfs.append(df)
    return pd.concat(dfs) 

In [48]:
df = get_holidays_df_for_year_range(2018, 2040)

In [51]:
df['date'] = pd.to_datetime(df['date'])

In [56]:
df = df.set_index('date', drop=True)

In [58]:
# Exercise lookup
df.loc['2018-01-01']

name    New Year's Day
Name: 2018-01-01 00:00:00, dtype: object

In [59]:
df.to_csv('holidays.csv')

# Load holidays dataset from csv

In [64]:
df = pd.read_csv('holidays.csv', parse_dates=['date'], index_col='date')

In [65]:
df

Unnamed: 0_level_0,name
date,Unnamed: 1_level_1
2018-01-01,New Year's Day
2018-01-15,Martin Luther King Jr. Day
2018-02-19,Presidents' Day
2018-05-28,Memorial Day
2018-07-04,Independence Day
...,...
2039-10-10,Columbus Day
2039-11-11,Veterans Day
2039-11-24,Thanksgiving Day
2039-12-25,Christmas Day


In [69]:
# Test creating a is_holiday column
from core.consts import EIA_EARLIEST_HOUR_UTC
from core.utils import create_timeseries_df_1h
start_ts = pd.to_datetime(EIA_EARLIEST_HOUR_UTC).to_pydatetime()
end_ts = pd.Timestamp.utcnow().round('h').to_pydatetime()

df_h = create_timeseries_df_1h(start_ts, end_ts)

In [73]:
df_h['year'] = df_h.index.year
df_h['month'] = df_h.index.month
df_h['day'] = df_h.index.day

In [74]:
df_h

Unnamed: 0_level_0,year,month,day
utc_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-07-01 05:00:00+00:00,2015,7,1
2015-07-01 06:00:00+00:00,2015,7,1
2015-07-01 07:00:00+00:00,2015,7,1
2015-07-01 08:00:00+00:00,2015,7,1
2015-07-01 09:00:00+00:00,2015,7,1
...,...,...,...
2024-10-21 16:00:00+00:00,2024,10,21
2024-10-21 17:00:00+00:00,2024,10,21
2024-10-21 18:00:00+00:00,2024,10,21
2024-10-21 19:00:00+00:00,2024,10,21


In [96]:
df_h['is_holiday'] = pd.Series(df_h.index.date, index=df_h.index).isin(df.index.date)

In [103]:
df_h

Unnamed: 0_level_0,year,month,day,is_holiday
utc_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-07-01 05:00:00+00:00,2015,7,1,False
2015-07-01 06:00:00+00:00,2015,7,1,False
2015-07-01 07:00:00+00:00,2015,7,1,False
2015-07-01 08:00:00+00:00,2015,7,1,False
2015-07-01 09:00:00+00:00,2015,7,1,False
...,...,...,...,...
2024-10-21 16:00:00+00:00,2024,10,21,False
2024-10-21 17:00:00+00:00,2024,10,21,False
2024-10-21 18:00:00+00:00,2024,10,21,False
2024-10-21 19:00:00+00:00,2024,10,21,False


In [102]:
lookup_dates = df_h[df_h['is_holiday'] == True].index.date
np.unique(df.loc[lookup_dates])

array(['Christmas Day', 'Christmas Day (substitute)', 'Christmas Eve',
       'Columbus Day', 'Inauguration Day', 'Independence Day',
       'Independence Day (substitute)', 'Juneteenth',
       'Juneteenth (substitute)', 'Labor Day',
       'Martin Luther King Jr. Day', 'Memorial Day', "New Year's Day",
       "New Year's Day (substitute)", "Presidents' Day",
       'Thanksgiving Day', 'Veterans Day', 'Veterans Day (substitute)'],
      dtype=object)