In [35]:
import pandas as pd
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

import glob
import os

from scipy.stats import zscore

In [36]:
weather_path = os.path.join('..','data','weather','precip_temp.csv')
weather_df = pd.read_csv(weather_path)
clients_path = os.path.join('..','data','wifi','**','Clients per day.csv')
clients_df = pd.concat(map(lambda csv: pd.read_csv(csv, parse_dates=[0]),
                           glob.glob(clients_path)), ignore_index=True)
weekday_labels = []
for d in range(7):
    label = 'weekday-%i' % d
    weekday_labels.append(label)
    clients_df[label] = [int(dt.weekday()==d) for dt in clients_df['Time']]
# Put clients date in the same format as it is in the weather data
clients_df['Time'] = [dt.strftime('%Y-%m-%d') for dt in clients_df['Time']]

all_data = clients_df.merge(weather_df, left_on='Time',
                            right_on='DATE', how='left')
# all_data.drop(columns=['Time', 'DATE'])

prop_to_set_aside_as_test_data = 0.1
xy_test = all_data.sample(frac=prop_to_set_aside_as_test_data)
xy_train = all_data.drop(xy_test.index)

print(xy_train[xy_train['TMIN'].isnull()])

cols_to_norm = ['TMIN']
for col in cols_to_norm:
    xy_test[col] = zscore(xy_test[col])
    xy_train[col] = zscore(xy_train[col])

x_labels = weekday_labels + ['PRCP', 'TMAX', 'TMIN']
y_labels = ['# Clients']
x_train = xy_train[x_labels].values
y_train = xy_train[y_labels].values
x_test = xy_test[x_labels].values
y_test = xy_test[y_labels].values

           Time  # Clients  weekday-0  weekday-1  weekday-2  weekday-3  \
311  2017-05-19        245          0          0          0          0   

     weekday-4  weekday-5  weekday-6 DATE  PRCP  TMAX  TMIN  
311          1          0          0  NaN   NaN   NaN   NaN  


In [15]:
epochs = 10

model = Sequential()
model.add(Dense(10, activation='relu', input_shape=(10,)))
model.add(Dropout(0.3))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='linear'))

model.summary()

model.compile(loss='mean_absolute_percentage_error',
              optimizer='Adagrad')

history = model.fit(x_train, y_train,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 10)                110       
_________________________________________________________________
dropout_11 (Dropout)         (None, 10)                0         
_________________________________________________________________
dense_17 (Dense)             (None, 10)                110       
_________________________________________________________________
dropout_12 (Dropout)         (None, 10)                0         
_________________________________________________________________
dense_18 (Dense)             (None, 1)                 11        
Total params: 231
Trainable params: 231
Non-trainable params: 0
_________________________________________________________________
Train on 318 samples, validate on 35 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
