In [1]:
import pandas as pd
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

import glob
import os

from scipy.stats import zscore

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [10]:
weather_path = os.path.join('..','data','weather','precip_temp.csv')
weather_df = pd.read_csv(weather_path)
clients_path = os.path.join('..','data','wifi','**','Clients per day.csv')
clients_df = pd.concat(map(lambda csv: pd.read_csv(csv, parse_dates=[0]),
                           glob.glob(clients_path)), ignore_index=True)
weekday_labels = []
for d in range(7):
    label = 'weekday-%i' % d
    weekday_labels.append(label)
    clients_df[label] = [int(dt.weekday()==d) for dt in clients_df['Time']]
# Put clients date in the same format as it is in the weather data
clients_df['Time'] = [dt.strftime('%Y-%m-%d') for dt in clients_df['Time']]

# This merge ignores the missing data point in the weather data
all_data = clients_df.merge(weather_df, left_on='Time', right_on='DATE')

prop_to_set_aside_as_test_data = 0.1
xy_test = all_data.sample(frac=prop_to_set_aside_as_test_data)
xy_train = all_data.drop(xy_test.index)

cols_to_norm = ['TMIN', 'TMAX', 'PRCP', '# Clients']
for col in cols_to_norm:
    xy_test[col] = zscore(xy_test[col])
    xy_train[col] = zscore(xy_train[col])

x_labels = weekday_labels + ['PRCP', 'TMAX', 'TMIN']
y_labels = ['# Clients']
x_train = xy_train[x_labels].values
y_train = xy_train[y_labels].values
x_test = xy_test[x_labels].values
y_test = xy_test[y_labels].values

xy_test

Unnamed: 0,Time,# Clients,weekday-0,weekday-1,weekday-2,weekday-3,weekday-4,weekday-5,weekday-6,DATE,PRCP,TMAX,TMIN
44,2017-08-14,0.526935,1,0,0,0,0,0,0,2017-08-14,-0.415505,0.662649,1.17964
332,2017-06-10,0.480981,0,0,0,0,0,1,0,2017-06-10,-0.415505,1.117409,1.102731
127,2017-11-05,-1.495025,0,0,0,0,0,0,1,2017-11-05,-0.415505,-0.531094,-0.294452
108,2017-10-17,-0.116416,0,1,0,0,0,0,0,2017-10-17,-0.415505,-0.315083,-0.461089
109,2017-10-18,-0.484045,0,0,1,0,0,0,0,2017-10-18,-0.415505,0.116938,-0.153452
224,2017-02-21,-0.208323,0,1,0,0,0,0,0,2017-02-21,-0.415505,-0.406035,-0.499543
341,2017-06-19,-0.131734,1,0,0,0,0,0,0,2017-06-19,3.631153,0.958243,1.282186
144,2017-11-22,-1.540978,0,0,1,0,0,0,0,2017-11-22,-0.415505,-0.519725,-1.294272
269,2017-04-07,-0.30023,0,0,0,0,1,0,0,2017-04-07,-0.415505,-0.872163,-0.80718
161,2017-12-09,-1.004853,0,0,0,0,0,1,0,2017-12-09,2.935634,-2.145489,-1.460908


In [11]:
epochs = 100

model = Sequential()
model.add(Dense(30, activation='relu', input_shape=(10,)))
model.add(Dropout(0.3))
model.add(Dense(30, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='linear'))

model.summary()

model.compile(loss='mean_absolute_percentage_error',
              optimizer='Adagrad')

history = model.fit(x_train, y_train,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 30)                330       
_________________________________________________________________
dropout_5 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 30)                930       
_________________________________________________________________
dropout_6 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 31        
Total params: 1,291
Trainable params: 1,291
Non-trainable params: 0
_________________________________________________________________
Train on 317 samples, validate on 35 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100

Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Test loss: 90.09158412388393
