# Time-based Features

- Date
- Hour
- Light (y/n)
- Holiday (y/n)
- Rush Hour (y/n)

In [1]:
# package imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Determine Holidays

In [3]:
holidays = pd.read_table('../../data/raw/holidays.txt', header=None)

In [4]:
holidays.columns = ['date_text', 'holiday']

In [5]:
holidays['date'] = pd.to_datetime(holidays.date_text)

In [6]:
holiday_list = holidays.holiday.drop_duplicates().values.tolist()

In [7]:
relevant_holidays = [
    "New Year's Day",
    "'New Year's Day' observed",
    'Martin Luther King Jr. Day',
    "Valentine's Day",
    "Presidents' Day (Most regions)",
    'Ash Wednesday',
    "St. Patrick's Day",
    'Palm Sunday',
    'Maundy Thursday',
    'Good Friday (Many regions)',
    'Holy Saturday',
    'Easter Sunday',
    'Easter Monday',
    'Cinco de Mayo',
    "Mother's Day",
    'Memorial Day',
    "Father's Day",
    'Independence Day',
    'Labor Day',
    'Columbus Day (Most regions)',
    'Halloween',
    "All Saints' Day",
    "All Souls' Day",
    'Election Day',
    'Veterans Day',
    'Thanksgiving Day',
    'Black Friday',
    'Christmas Eve',
    'Christmas Day',
    "New Year's Eve",
    "'Independence Day' observed",
    'Day After Christmas Day',
    "'Christmas Day' observed",
    'Veterans Day observed']

In [8]:
holidays = holidays[holidays.holiday.isin(relevant_holidays)]

In [9]:
ash_wednesdays = holidays[holidays['holiday'] == 'Ash Wednesday']

In [10]:
mardi_gras = ash_wednesdays.date + pd.Timedelta('-1 day')

In [11]:
lundi_gras = ash_wednesdays.date + pd.Timedelta('-2 days')

In [12]:
mardi_gras_sunday = ash_wednesdays.date + pd.Timedelta('-3 days')

In [13]:
mardi_gras_saturday = ash_wednesdays.date + pd.Timedelta('-4 days')

In [14]:
mardi_gras_friday = ash_wednesdays.date + pd.Timedelta('-5 days')

In [15]:
mardi_gras_thursday = ash_wednesdays.date + pd.Timedelta('-6 days')

In [16]:
mardi_gras_parades = pd.concat([mardi_gras, 
                                lundi_gras, 
                                mardi_gras_sunday, 
                                mardi_gras_saturday, 
                                mardi_gras_friday, 
                                mardi_gras_thursday], 
                               ignore_index=True)

In [17]:
holiday_dates = pd.concat([holidays.date, mardi_gras_parades], ignore_index=True)

In [18]:
holiday_dates = holiday_dates.sort_values()

In [19]:
holiday_dates = holiday_dates.drop_duplicates()

In [20]:
holiday_dates.reset_index(drop=True, inplace=True)

In [22]:
holiday_dates.to_pickle('../../data/interim/features/holiday_dates.pickle')

## Daylight

In [23]:
weather_data = pd.read_csv('../../data/raw/1399048.csv', usecols=[5, 35, 36])

weather_data['DATE'] = pd.to_datetime(weather_data['DATE'])
weather_data['DAILYSunrise'] = pd.to_datetime(weather_data.DAILYSunrise.astype('str').str.pad(4, fillchar='0'), format='%H%M').dt.time
weather_data['DAILYSunrise'] = pd.to_datetime(weather_data.DATE.dt.date.astype('str') + ' ' + weather_data.DAILYSunrise.astype('str'))
weather_data['DAILYSunset'] = pd.to_datetime(weather_data.DAILYSunset, format='%H%M').dt.time
weather_data['DAILYSunset'] = pd.to_datetime(weather_data.DATE.dt.date.astype('str') + ' ' + weather_data.DAILYSunset.astype('str'))
weather_data.set_index('DATE', inplace=True)

In [24]:
daylight_by_date = weather_data.resample('D').first()

In [25]:
daylight_by_date['sunrise'] = daylight_by_date.DAILYSunrise.dt.time

In [26]:
daylight_by_date['sunset'] = daylight_by_date.DAILYSunset.dt.time

In [27]:
daylight_by_date = daylight_by_date.drop(columns=['DAILYSunrise', 'DAILYSunset'])

In [28]:
daylight_by_date.to_pickle('../../data/interim/features/daylight.pickle')

## Rush Hour

In [29]:
rush_hour = pd.DataFrame(index=pd.date_range('2012-01-01', '2018-06-30', freq='B'))

In [30]:
rush_hour['rush_hour_am_1'] = pd.to_datetime('07:00').time()
rush_hour['rush_hour_am_2'] = pd.to_datetime('08:00').time()
rush_hour['rush_hour_am_3'] = pd.to_datetime('09:00').time()
rush_hour['rush_hour_pm_1'] = pd.to_datetime('16:00').time()
rush_hour['rush_hour_pm_2'] = pd.to_datetime('17:00').time()
rush_hour['rush_hour_pm_3'] = pd.to_datetime('18:00').time()

In [31]:
rush_hour = rush_hour.stack().reset_index(level=[0,1])

In [32]:
rush_hour.columns = ['date', 'drop_me', 'time']

In [33]:
rush_hour_datetime = pd.to_datetime(rush_hour.date.astype('str') + ' ' + rush_hour.time.astype('str'))

In [34]:
rush_hour_datetime = pd.DataFrame(index=rush_hour_datetime)

In [35]:
rush_hour_datetime['rush_hour'] = 1

In [36]:
rush_hour_datetime.to_pickle('../../data/interim/features/rush_hour.pickle')