## Function to extract

In [2]:
def extract_canton_hourly(df_type, canton):
    canton_public = df_type.loc[df_type[canton] == True]
    canton_public = canton_public[["startDate", "endDate", "type", "Name"]]
    
    expanded_df = pd.DataFrame()
    
    for _, row in canton_public.iterrows():
        event_dates = pd.date_range(start=row["startDate"], end=row["endDate"], freq='H')
        event_data = pd.DataFrame({"type": [row["type"]] * len(event_dates),
                                   "Name": [row["Name"]] * len(event_dates)},
                                  index=event_dates)
        expanded_df = pd.concat([expanded_df, event_data])
        return expanded_df
    

# Public Holiday ✅

We extract all the public holidays from the neighbor cantons

In [3]:
# Urlaubstage und Public Holidays

import requests
import pandas as pd

# https://www.openholidaysapi.org/en/#school-holidays

api_url_p = "https://openholidaysapi.org/PublicHolidays"

params = {
    "countryIsoCode": "CH",

    "validFrom": "2021-08-28",
    "validTo": "2023-11-08"
}

headers = {'accept': 'text/json'}

response_p = requests.get(api_url_p, params=params, headers=headers)

holidays_data_p = response_p.json()

holidays_df_p = pd.DataFrame(holidays_data_p)

extract_text = lambda col: col[0]["text"] if col else None
holidays_df_p["Name"] = holidays_df_p["name"].apply(extract_text)

holidays_df_p["startDate"] = pd.to_datetime(holidays_df_p["startDate"], format="%Y-%m-%d")
holidays_df_p["endDate"] = pd.to_datetime(holidays_df_p["endDate"], format="%Y-%m-%d")

holidays_df_p["subdivision_short_names"] = holidays_df_p["subdivisions"].apply(lambda x: [entry["shortName"] for entry in x] if isinstance(x, list) else [])
holidays_df_p


all_unique_short_names = set()
_ = holidays_df_p["subdivision_short_names"].apply(lambda x: all_unique_short_names.update(x))

for short_name in all_unique_short_names:
    holidays_df_p[short_name] = holidays_df_p["subdivision_short_names"].apply(lambda x: short_name in x)

holidays_df_p.drop(columns = ["id", "name", "subdivisions", "subdivision_short_names"], axis = 1, inplace = True)

holidays_df_p.head()

Unnamed: 0,startDate,endDate,type,nationwide,Name,JU,AG,BE,GL,NE,...,TI,ZG,AR,TG,SG,FR,SZ,VS,OW,BL
0,2021-09-09,2021-09-09,Public,False,Genfer Bettag,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,2021-09-13,2021-09-13,Public,False,Knabenschiessen,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,2021-09-19,2021-09-19,Public,False,"Eidgenössischer Dank-, Buss- und Bettag",True,True,True,True,True,...,True,True,True,True,True,True,True,True,True,True
3,2021-09-22,2021-09-22,Public,False,Mauritiustag,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,2021-09-25,2021-09-25,Public,False,Bruder-Klausen-Fest,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False


In [4]:
canton_public = holidays_df_p.loc[holidays_df_p["ZH"] == True]

canton_public = canton_public[["startDate","Name"]]
canton_public.set_index("startDate", inplace = True)
canton_public.rename_axis("Date", inplace = True)
canton_public.reset_index(inplace = True)

canton_public["Date"]  = pd.to_datetime(canton_public["Date"])

canton_public

Unnamed: 0,Date,Name
0,2021-09-13,Knabenschiessen
1,2021-09-19,"Eidgenössischer Dank-, Buss- und Bettag"
2,2021-12-26,Stephanstag
3,2022-01-02,Berchtoldstag
4,2022-04-15,Karfreitag
5,2022-04-18,Ostermontag
6,2022-04-25,Sechseläuten
7,2022-05-01,Tag der Arbeit
8,2022-06-06,Pfingstmontag
9,2022-09-12,Knabenschiessen


In [5]:
timeframe = pd.read_csv("accurate_timeframe.csv")

timeframe["datetime"]  = pd.to_datetime(timeframe["datetime"])

timeframe.head()

Unnamed: 0,datetime
0,2021-09-29 00:00:00
1,2021-09-29 01:00:00
2,2021-09-29 02:00:00
3,2021-09-29 03:00:00
4,2021-09-29 04:00:00


In [6]:
timeframe['date'] = timeframe['datetime'].dt.date
canton_public['date'] = canton_public['Date'].dt.date
timeframe['date']  = pd.to_datetime(timeframe['date'])
canton_public['date']  = pd.to_datetime(canton_public['date'])

merged_df = pd.merge_asof(timeframe, canton_public, left_on=['date'], right_on=['date'], direction='backward', suffixes=('_left', '_right'))

# Filter the merged DataFrame to keep only rows where the 'Date_left' is greater than or equal to the 'Date_right'
merged_df

# Filter the merged DataFrame to keep only rows where the 'Date_left' is greater than or equal to the 'Date_right'
merged_df = merged_df[merged_df['date'] == merged_df['Date']]
merged_df = merged_df[["datetime", "Name"]]
merged_df.set_index("datetime", inplace = True)
merged_df.rename_axis("Date", inplace = True)
merged_df.rename(columns={"Name": "Public Holiday ZH"}, inplace=True)


merged_df.to_csv("zh_public.csv")

display(merged_df)
print(merged_df.shape)

Unnamed: 0_level_0,Public Holiday ZH
Date,Unnamed: 1_level_1
2021-12-26 00:00:00,Stephanstag
2021-12-26 01:00:00,Stephanstag
2021-12-26 02:00:00,Stephanstag
2021-12-26 03:00:00,Stephanstag
2021-12-26 04:00:00,Stephanstag
...,...
2023-09-17 19:00:00,"Eidgenössischer Dank-, Buss- und Bettag"
2023-09-17 20:00:00,"Eidgenössischer Dank-, Buss- und Bettag"
2023-09-17 21:00:00,"Eidgenössischer Dank-, Buss- und Bettag"
2023-09-17 22:00:00,"Eidgenössischer Dank-, Buss- und Bettag"


(432, 1)


# School Holidays

In [119]:
# Urlaubstage und Public Holidays

import requests
import pandas as pd

# https://www.openholidaysapi.org/en/#school-holidays

api_url_s = "https://openholidaysapi.org/SchoolHolidays"

params = {
    "countryIsoCode": "CH",

    "validFrom": "2021-08-28",
    "validTo": "2023-11-08"
}

headers = {'accept': 'text/json'}

response_s = requests.get(api_url_s, params=params, headers=headers)

holidays_data_s = response_s.json()

holidays_df_s = pd.DataFrame(holidays_data_s)

extract_text = lambda col: col[0]["text"] if col else None
holidays_df_s["Name"] = holidays_df_s["name"].apply(extract_text)

holidays_df_s["startDate"] = pd.to_datetime(holidays_df_s["startDate"], format="%Y-%m-%d")
holidays_df_s["endDate"] = pd.to_datetime(holidays_df_s["endDate"], format="%Y-%m-%d")

holidays_df_s["subdivision_short_names"] = holidays_df_s["subdivisions"].apply(lambda x: [entry["shortName"] for entry in x] if isinstance(x, list) else [])
holidays_df_s


all_unique_short_names = set()
_ = holidays_df_s["subdivision_short_names"].apply(lambda x: all_unique_short_names.update(x))

for short_name in all_unique_short_names:
    holidays_df_s[short_name] = holidays_df_s["subdivision_short_names"].apply(lambda x: short_name in x)

holidays_df_s.drop(columns = ["id", "name", "subdivisions", "subdivision_short_names"], axis = 1, inplace = True)

holidays_df_s.drop("comment", axis = 1, inplace = True)

holidays_df_s


Unnamed: 0,startDate,endDate,type,nationwide,Name,JU,AG,BE,GL,NE,...,UR,ZG,AR,TG,SG,FR,SZ,VS,OW,BL
0,2021-06-19,2021-08-29,School,False,Sommer,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,2021-07-03,2021-08-29,School,False,Sommer,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,2021-09-25,2021-10-17,School,False,Herbst,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
3,2021-09-25,2021-10-10,School,False,Herbst,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,2021-10-02,2021-10-24,School,False,Herbst,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,2023-10-14,2023-10-29,School,False,Herbst,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
300,2023-10-14,2023-10-29,School,False,Herbst,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
301,2023-10-19,2023-10-29,School,False,Herbst,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
302,2023-10-21,2023-10-29,School,False,Herbst,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [126]:
canton_school = holidays_df_s.loc[holidays_df_s["AG"] == True]
canton_school = canton_school[["startDate", "endDate", "Name"]]

canton_school.set_index("startDate", inplace=True)
canton_school.rename_axis("Date", inplace=True)
canton_school.reset_index(inplace=True)

canton_school["Date"] = pd.to_datetime(canton_school["Date"])

timeframe = pd.read_csv("accurate_timeframe.csv")
timeframe["datetime"] = pd.to_datetime(timeframe["datetime"])

timeframe["date"] = timeframe["datetime"].dt.date
canton_school["date"] = canton_school["Date"].dt.date
timeframe["date"] = pd.to_datetime(timeframe["date"])
canton_school["date"] = pd.to_datetime(canton_school["date"])
display(canton_school)
display(timeframe)

Unnamed: 0,Date,endDate,Name,date
0,2021-10-02,2021-10-17,Herbst,2021-10-02
1,2021-12-24,2022-01-09,Winter,2021-12-24
2,2022-04-09,2022-04-24,Frühling,2022-04-09
3,2022-07-16,2022-08-07,Sommer,2022-07-16
4,2022-10-01,2022-10-16,Herbst,2022-10-01
5,2022-12-24,2023-01-08,Winter,2022-12-24
6,2023-04-07,2023-04-23,Frühling,2023-04-07
7,2023-07-22,2023-08-13,Sommer,2023-07-22
8,2023-09-30,2023-10-15,Herbst,2023-09-30


Unnamed: 0,datetime,date
0,2021-09-29 00:00:00,2021-09-29
1,2021-09-29 01:00:00,2021-09-29
2,2021-09-29 02:00:00,2021-09-29
3,2021-09-29 03:00:00,2021-09-29
4,2021-09-29 04:00:00,2021-09-29
...,...,...
18378,2023-11-07 20:00:00,2023-11-07
18379,2023-11-07 21:00:00,2023-11-07
18380,2023-11-07 22:00:00,2023-11-07
18381,2023-11-07 23:00:00,2023-11-07


In [127]:
merged_df = pd.merge_asof(timeframe, canton_school, on="date", direction="backward", suffixes=("_left", "_right"))

In [128]:
from pandas.tseries.offsets import DateOffset

merged_df = merged_df[merged_df["datetime"] < merged_df["endDate"] + DateOffset(days=1)]

merged_df = merged_df[["datetime", "Name"]]
merged_df.set_index("datetime", inplace = True)
merged_df.rename_axis("Date", inplace = True)
merged_df.rename(columns={"Name": "School Holidays AG"}, inplace=True)

merged_df.to_csv("ag_school_holidays.csv")
merged_df

Unnamed: 0_level_0,School Holidays AG
Date,Unnamed: 1_level_1
2021-10-02 00:00:00,Herbst
2021-10-02 01:00:00,Herbst
2021-10-02 02:00:00,Herbst
2021-10-02 03:00:00,Herbst
2021-10-02 04:00:00,Herbst
...,...
2023-10-15 19:00:00,Herbst
2023-10-15 20:00:00,Herbst
2023-10-15 21:00:00,Herbst
2023-10-15 22:00:00,Herbst
