In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
#read weather data. its timestamp is already in date/hour format
weather = pd.read_csv('../data/raw/hourlyWeatherSummary_2015_2016.csv')
weather['date_hour'] = weather['timestamp'].apply(pd.Timestamp)
weather = weather.drop('timestamp', axis=1)

#add time features
weather['date'] = weather['date_hour'].dt.date
weather['hour'] = weather['date_hour'].dt.hour
weather['weekday'] = weather['date_hour'].dt.weekday
weather['month'] = weather['date_hour'].dt.month
weather['year'] = weather['date_hour'].dt.year
weather.head()

Unnamed: 0,apparentTemperature,cloudCover,dewPoint,humidity,icon,precipAccumulation,precipIntensity,precipProbability,precipType,pressure,...,uvIndex,visibility,windBearing,windSpeed,date_hour,date,hour,weekday,month,year
0,-10.83,0.22,-11.59,0.63,clear-night,,0.0,0.0,,1019.96,...,0.0,15.4,229,3.42,2015-01-01 00:00:00-05:00,2015-01-01,0,3,1,2015
1,-11.83,0.17,-12.09,0.62,clear-night,,0.0,0.0,,1019.03,...,0.0,15.26,226,4.31,2015-01-01 01:00:00-05:00,2015-01-01,1,3,1,2015
2,-12.07,0.06,-12.44,0.6,clear-night,,0.0,0.0,,1018.15,...,0.0,15.48,232,4.47,2015-01-01 02:00:00-05:00,2015-01-01,2,3,1,2015
3,-11.53,0.07,-12.67,0.57,clear-night,,0.0,0.0,,1017.48,...,0.0,15.42,236,4.74,2015-01-01 03:00:00-05:00,2015-01-01,3,3,1,2015
4,-11.01,0.34,-12.96,0.53,partly-cloudy-night,,0.0,0.0,,1016.75,...,0.0,15.51,242,5.17,2015-01-01 04:00:00-05:00,2015-01-01,4,3,1,2015


In [3]:
#read holiday data
holidays = pd.read_csv('../data/raw/publicHolidays_2015_2016.csv')
holidays['date'] = holidays['date'].apply(pd.Timestamp).dt.date
holidays.head()

Unnamed: 0,description,date
0,New Year's Day,2015-01-01
1,"Martin Luther King, Jr. Day",2015-01-19
2,Presidents' Day,2015-02-16
3,Evacuation Day,2015-03-17
4,Patriots' Day,2015-04-20


In [4]:
#merge weather and holidays.
additional_features = weather.merge(holidays, how='left', on=('date'))

#add features to mark holidays and weekends
additional_features = additional_features.rename(columns={'description': 'holiday_description'})
additional_features['is_holiday'] = 1 * additional_features['holiday_description'].notnull()
additional_features['is_weekend'] = 1 * ((additional_features['weekday'] == 5) | (additional_features['weekday'] == 6))
additional_features['is_weekend_or_holiday'] = 1 * (additional_features['is_weekend'] | additional_features['is_holiday'])
additional_features = additional_features.set_index('date_hour')
additional_features.head()

Unnamed: 0_level_0,apparentTemperature,cloudCover,dewPoint,humidity,icon,precipAccumulation,precipIntensity,precipProbability,precipType,pressure,...,windSpeed,date,hour,weekday,month,year,holiday_description,is_holiday,is_weekend,is_weekend_or_holiday
date_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-01 00:00:00-05:00,-10.83,0.22,-11.59,0.63,clear-night,,0.0,0.0,,1019.96,...,3.42,2015-01-01,0,3,1,2015,New Year's Day,1,0,1
2015-01-01 01:00:00-05:00,-11.83,0.17,-12.09,0.62,clear-night,,0.0,0.0,,1019.03,...,4.31,2015-01-01,1,3,1,2015,New Year's Day,1,0,1
2015-01-01 02:00:00-05:00,-12.07,0.06,-12.44,0.6,clear-night,,0.0,0.0,,1018.15,...,4.47,2015-01-01,2,3,1,2015,New Year's Day,1,0,1
2015-01-01 03:00:00-05:00,-11.53,0.07,-12.67,0.57,clear-night,,0.0,0.0,,1017.48,...,4.74,2015-01-01,3,3,1,2015,New Year's Day,1,0,1
2015-01-01 04:00:00-05:00,-11.01,0.34,-12.96,0.53,partly-cloudy-night,,0.0,0.0,,1016.75,...,5.17,2015-01-01,4,3,1,2015,New Year's Day,1,0,1


In [5]:
additional_features.to_csv('../data/modelInput/additionalFeatures.csv')