In [356]:
import pandas as pd
import sklearn 
import scipy
import datetime
from datetime import timedelta, time
from sklearn import linear_model as lm
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression, Lasso, Ridge , ElasticNet, LogisticRegression, lars_path
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cross_validation import KFold, train_test_split, cross_val_score, StratifiedKFold, LabelKFold, ShuffleSplit
from sklearn.metrics import mean_absolute_error
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
import matplotlib.pyplot as plt 
% matplotlib inline
import numpy as np
import math 
import seaborn as sns
import statsmodels.api as sm
from keras import models, layers, optimizers
from keras.optimizers import Adam 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Reshape, Input, merge, GlobalAveragePooling1D, Convolution1D, AveragePooling1D, Activation, Flatten
from keras.preprocessing import sequence
from keras.models import Model
from keras.engine import topology
from keras.layers.core import Lambda
from keras import backend as K

In [357]:
df_2011 = pd.read_table("sunrise_sunset_2011.txt", delim_whitespace=True, dtype = 'str') 
df_2012 = pd.read_table("sunrise_sunset_2012.txt", delim_whitespace=True, dtype = 'str') 

In [358]:
df_2012.head()

Unnamed: 0,Day,Rise_1,Set_1,Rise_2,Set_2,Rise_3,Set_3,Rise_4,Set_4,Rise_5,...,Rise_8,Set_8,Rise_9,Set_9,Rise_10,Set_10,Rise_11,Set_11,Rise_12,Set_12
0,1,727,1657,715,1729,640,1801,552,1832,510,...,510,1919,537,1838,604,1750,636,1707,708,1646
1,2,727,1657,714,1730,638,1802,551,1833,509,...,510,1918,538,1836,605,1749,637,1706,709,1646
2,3,727,1658,713,1731,637,1803,549,1834,507,...,511,1917,539,1835,606,1747,638,1705,710,1646
3,4,727,1659,712,1732,635,1805,547,1835,506,...,512,1915,540,1833,607,1745,639,1704,711,1646
4,5,727,1700,711,1734,634,1806,546,1836,505,...,513,1914,541,1831,608,1744,640,1703,712,1646


# A- Augmenting the daily data set 

In [359]:
start_date = datetime.date(2011, 1, 1)
end_date   = datetime.date(2013, 1, 1)

dates_2011_2012 = [ start_date + datetime.timedelta(n) for n in range(int ((end_date - start_date).days))]

In [360]:
df = pd.DataFrame(dates_2011_2012, columns= ['date'])

In [361]:
def get_day(date):
    return date.day
def get_month(date):
    return date.month
def get_year(date):
    return date.year
def get_isoformat(date):
    return date.isoformat()

In [362]:
df['day'] = df['date'].apply(get_day)
df['mnth'] = df['date'].apply(get_month)
df['year'] = df['date'].apply(get_year)
df['dteday'] = df['date'].apply(get_isoformat)

In [363]:
df.head()

Unnamed: 0,date,day,mnth,year,dteday
0,2011-01-01,1,1,2011,2011-01-01
1,2011-01-02,2,1,2011,2011-01-02
2,2011-01-03,3,1,2011,2011-01-03
3,2011-01-04,4,1,2011,2011-01-04
4,2011-01-05,5,1,2011,2011-01-05


## bringing in the sunrise and sunset data 

In [364]:
for j in range(12):
    index_list_2011 = df[(df['mnth']== (j+1))&(df['year']==2011)].index.get_values()
    index_list_2012 = df[(df['mnth']== (j+1))&(df['year']==2012)].index.get_values()
    
    for k,i in enumerate(index_list_2011):
        df.loc[i,'Rise'] = df_2011['Rise_'+str(j+1)][k] 
        df.loc[i,'Set'] = df_2011['Set_'+str(j+1)][k]
        
    for k,i in enumerate(index_list_2012):
        df.loc[i,'Rise'] = df_2012['Rise_'+str(j+1)][k] 
        df.loc[i,'Set'] = df_2012['Set_'+str(j+1)][k]

In [365]:
df.head()

Unnamed: 0,date,day,mnth,year,dteday,Rise,Set
0,2011-01-01,1,1,2011,2011-01-01,727,1657
1,2011-01-02,2,1,2011,2011-01-02,727,1658
2,2011-01-03,3,1,2011,2011-01-03,727,1658
3,2011-01-04,4,1,2011,2011-01-04,727,1659
4,2011-01-05,5,1,2011,2011-01-05,727,1700


In [366]:
def str_to_datetime(time):
    return datetime.time(hour = int(time[0:2]), minute = int(time[2:5]))
def convert_to_minutes(time):
    t= datetime.time(hour = int(time[0:2]), minute = int(time[2:5]))
    return t.hour*60 + t.minute

In [367]:
df['Rise datetime']=df['Rise'].apply(str_to_datetime)
df['Set datetime']=df['Set'].apply(str_to_datetime)
df['daylight exposure'] = df['Set'].apply(convert_to_minutes)-df['Rise'].apply(convert_to_minutes)

In [368]:
df.head()

Unnamed: 0,date,day,mnth,year,dteday,Rise,Set,Rise datetime,Set datetime,daylight exposure
0,2011-01-01,1,1,2011,2011-01-01,727,1657,07:27:00,16:57:00,570
1,2011-01-02,2,1,2011,2011-01-02,727,1658,07:27:00,16:58:00,571
2,2011-01-03,3,1,2011,2011-01-03,727,1658,07:27:00,16:58:00,571
3,2011-01-04,4,1,2011,2011-01-04,727,1659,07:27:00,16:59:00,572
4,2011-01-05,5,1,2011,2011-01-05,727,1700,07:27:00,17:00:00,573


In [369]:
print("max exposure:", df['daylight exposure'].max(), "- min exposure:",df['daylight exposure'].min())

max exposure: 894 - min exposure: 566


# B-  Augmenting the hourly dataset

In [370]:
start_date_hour = datetime.datetime(2011, 1, 1, 0)
end_date_hour   = datetime.datetime(2013, 1, 1, 0)

In [371]:
m = (end_date_hour.year-start_date_hour.year)*365*24+(end_date_hour.month - start_date_hour.month)*30*24 + (end_date_hour.day - start_date_hour.day)*24+(end_date_hour.hour - start_date_hour.hour)
dates_2011_2012_hour = [ start_date_hour + datetime.timedelta(hours = n) for n in range(m)]
df_hour = pd.DataFrame(dates_2011_2012_hour, columns= ['date'])

In [372]:
df_hour.head()

Unnamed: 0,date
0,2011-01-01 00:00:00
1,2011-01-01 01:00:00
2,2011-01-01 02:00:00
3,2011-01-01 03:00:00
4,2011-01-01 04:00:00


In [373]:
def get_hour(date):
    return date.hour
def get_day(date):
    return date.day
def get_month(date):
    return date.month
def get_year(date):
    return date.year
def get_isoformat(date):
    return date.isoformat()

In [374]:
df_hour['hour'] = df_hour['date'].apply(get_hour)
df_hour['day'] = df_hour['date'].apply(get_day)
df_hour['mnth'] = df_hour['date'].apply(get_month)
df_hour['year'] = df_hour['date'].apply(get_year)
df_hour['dteday'] = df_hour['date'].apply(get_isoformat)

In [375]:
df_hour.head()

Unnamed: 0,date,hour,day,mnth,year,dteday
0,2011-01-01 00:00:00,0,1,1,2011,2011-01-01T00:00:00
1,2011-01-01 01:00:00,1,1,1,2011,2011-01-01T01:00:00
2,2011-01-01 02:00:00,2,1,1,2011,2011-01-01T02:00:00
3,2011-01-01 03:00:00,3,1,1,2011,2011-01-01T03:00:00
4,2011-01-01 04:00:00,4,1,1,2011,2011-01-01T04:00:00


In [376]:
def brightness(date_time):
    r = df['Rise datetime'][int(df[df['date'] == date_time.date()].index.get_values())]
    s = df['Set datetime'][int(df[df['date'] == date_time.date()].index.get_values())]
    result = 0
    if (date_time.hour == r.hour):
        result = 1-(r.minute /60)
    elif (date_time.hour == s.hour):
        result = s.minute /60
    elif (r.hour< date_time.hour <s.hour):
        result = 1
    else:
        result = 0
    return result

In [377]:
df_hour['brightness'] = df_hour['date'].apply(brightness)

In [378]:
df_hour.head(60)

Unnamed: 0,date,hour,day,mnth,year,dteday,brightness
0,2011-01-01 00:00:00,0,1,1,2011,2011-01-01T00:00:00,0.0
1,2011-01-01 01:00:00,1,1,1,2011,2011-01-01T01:00:00,0.0
2,2011-01-01 02:00:00,2,1,1,2011,2011-01-01T02:00:00,0.0
3,2011-01-01 03:00:00,3,1,1,2011,2011-01-01T03:00:00,0.0
4,2011-01-01 04:00:00,4,1,1,2011,2011-01-01T04:00:00,0.0
5,2011-01-01 05:00:00,5,1,1,2011,2011-01-01T05:00:00,0.0
6,2011-01-01 06:00:00,6,1,1,2011,2011-01-01T06:00:00,0.0
7,2011-01-01 07:00:00,7,1,1,2011,2011-01-01T07:00:00,0.55
8,2011-01-01 08:00:00,8,1,1,2011,2011-01-01T08:00:00,1.0
9,2011-01-01 09:00:00,9,1,1,2011,2011-01-01T09:00:00,1.0


In [381]:
df.head()

Unnamed: 0,date,day,mnth,year,dteday,Rise,Set,Rise datetime,Set datetime,daylight exposure
0,2011-01-01,1,1,2011,2011-01-01,727,1657,07:27:00,16:57:00,570
1,2011-01-02,2,1,2011,2011-01-02,727,1658,07:27:00,16:58:00,571
2,2011-01-03,3,1,2011,2011-01-03,727,1658,07:27:00,16:58:00,571
3,2011-01-04,4,1,2011,2011-01-04,727,1659,07:27:00,16:59:00,572
4,2011-01-05,5,1,2011,2011-01-05,727,1700,07:27:00,17:00:00,573
