# Dataset Creation

In [1]:
import numpy as np
import pandas as pd

In [2]:
# define list of holidays
# each holiday contains a start date [0] and an end date [0]
# From https://www.kalenderpedia.de/ferien/ferien-schleswig-holstein-2019.html
holidays = [['2018-12-21', '2019-01-04'],
            ['2019-03-29', '2019-04-13'],
            ['2019-04-04', '2019-04-18'],
            ['2019-05-31', '2019-05-31'],
            ['2019-07-01', '2019-08-10'],
            ['2019-10-04', '2019-10-18'],
            ['2019-12-23', '2020-01-06']]

def is_school_holiday_in_schleswig_holstein(date_strftime):
    #check iff given pd_timestamp is in holiday
    # date_strftime is date.strftime('%Y-%m-%d')
    for holiday in holidays:
        if date_strftime >= holiday[0] and date_strftime <= holiday[1]:
            return 1
    return 0


In [9]:
# make csv file with colums "Date", "Hour", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", "Janurary", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", "SchoolHoliday"

# make new pandas dataframe
def generate_time_flags():
    df = pd.DataFrame(columns=['Date', 'Hour', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December', 'SchoolHoliday'])
    # get "start_date" from 2019-01-02 4 o'clock
    start_date = pd.Timestamp('2018-12-21 00:00:00')
    # end date 2019-12-31 22 o'clock
    end_date = pd.Timestamp('2020-01-06 23:00:00')
    # iterate from start date to end date
    for date in pd.date_range(start_date, end_date, freq='H'):
        # extract date in this format "2019-01-02"
        date_str = date.strftime('%Y-%m-%d')
        # extract hour in this format "4"
        # interpret hour as int
        hour_str = int(date.strftime('%H'))
        # "monday" is 1 if date is monday, 0 otherwise
        monday = int(date.strftime('%w') == '1')
        # "tuesday" is 1 if date is tuesday, 0 otherwise
        tuesday = int(date.strftime('%w') == '2')
        # "wednesday" is 1 if date is wednesday, 0 otherwise
        wednesday = int(date.strftime('%w') == '3')
        # "thursday" is 1 if date is thursday, 0 otherwise
        thursday = int(date.strftime('%w') == '4')
        # "friday" is 1 if date is friday, 0 otherwise
        friday = int(date.strftime('%w') == '5')
        # "saturday" is 1 if date is saturday, 0 otherwise
        saturday = int(date.strftime('%w') == '6')
        # "sunday" is 1 if date is sunday, 0 otherwise
        sunday = int(date.strftime('%w') == '0')
        # "january" is 1 if date is january, 0 otherwise
        january = int(date.strftime('%m') == '1')
        # "february" is 1 if date is february, 0 otherwise
        february = int(date.strftime('%m') == '2')
        # "march" is 1 if date is march, 0 otherwise
        march = int(date.strftime('%m') == '3')
        # "april" is 1 if date is april, 0 otherwise
        april = int(date.strftime('%m') == '4')
        # "may" is 1 if date is may, 0 otherwise
        may = int(date.strftime('%m') == '5')
        # "june" is 1 if date is june, 0 otherwise
        june = int(date.strftime('%m') == '6')
        # "july" is 1 if date is july, 0 otherwise
        july = int(date.strftime('%m') == '7')
        # "august" is 1 if date is august, 0 otherwise
        august = int(date.strftime('%m') == '8')
        # "september" is 1 if date is september, 0 otherwise
        september = int(date.strftime('%m') == '9')
        # "october" is 1 if date is october, 0 otherwise
        october = int(date.strftime('%m') == '10')
        # "november" is 1 if date is november, 0 otherwise
        november = int(date.strftime('%m') == '11')
        # "december" is 1 if date is december, 0 otherwise
        december = int(date.strftime('%m') == '12')
        # "school_holiday" is 1 if date is in a school holiday, 0 otherwise
        school_holiday = is_school_holiday_in_schleswig_holstein(date_str)
        # add new row to dataframe
        df = df.append({'Date': date_str, 'Hour': hour_str, 'Monday': monday, 'Tuesday': tuesday, 'Wednesday': wednesday, 'Thursday': thursday, 'Friday': friday, 'Saturday': saturday, 'Sunday': sunday, 'January': january, 'February': february,
                        'March': march, 'April': april, 'May': may, 'June': june, 'July': july, 'August': august, 'September': september, 'October': october, 'November': november, 'December': december, 'SchoolHoliday': school_holiday}, ignore_index=True)
    return df

# generate prview
df = generate_time_flags()
print(df.head())


         Date Hour Monday Tuesday Wednesday Thursday Friday Saturday Sunday  \
0  2018-12-21    0      0       0         0        0      1        0      0   
1  2018-12-21    1      0       0         0        0      1        0      0   
2  2018-12-21    2      0       0         0        0      1        0      0   
3  2018-12-21    3      0       0         0        0      1        0      0   
4  2018-12-21    4      0       0         0        0      1        0      0   

  January  ... April May June July August September October November December  \
0       0  ...     0   0    0    0      0         0       0        0        1   
1       0  ...     0   0    0    0      0         0       0        0        1   
2       0  ...     0   0    0    0      0         0       0        0        1   
3       0  ...     0   0    0    0      0         0       0        0        1   
4       0  ...     0   0    0    0      0         0       0        0        1   

  SchoolHoliday  
0             1  
1 

# Copy Dataset to `datasets` folder


In [10]:
df.to_csv('../datasets/Timeflags.csv', index=False)