In [5]:
import pandas as pd
import numpy as np

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LeakyReLU
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam


import glob
import os

from scipy.stats import zscore

In [7]:
weather_path = os.path.join('..','data','weather','precip_temp.csv')
weather_df = pd.read_csv(weather_path)
clients_path = os.path.join('..','data','wifi','**','Clients per day.csv')
clients_df = pd.concat(map(lambda csv: pd.read_csv(csv, parse_dates=[0]),
                           glob.glob(clients_path)), ignore_index=True)
weekday_labels = []
for d in range(7):
    label = 'weekday-%i' % d
    weekday_labels.append(label)
    clients_df[label] = [int(dt.weekday()==d) for dt in clients_df['Time']]
# Put clients date in the same format as it is in the weather data
clients_df['Time'] = [dt.strftime('%Y-%m-%d') for dt in clients_df['Time']]

# This merge ignores the missing data point in the weather data
all_data = clients_df.merge(weather_df, left_on='Time', right_on='DATE')

cols_to_norm = ['TMIN', 'TMAX', 'PRCP']
for col in cols_to_norm:
    all_data[col] = zscore(all_data[col])

x_labels = weekday_labels + ['PRCP', 'TMAX', 'TMIN']
y_labels = ['# Clients']
x = all_data[x_labels].values
y = all_data[y_labels].values
all_data

Unnamed: 0,Time,# Clients,weekday-0,weekday-1,weekday-2,weekday-3,weekday-4,weekday-5,weekday-6,DATE,PRCP,TMAX,TMIN
0,2017-01-01,93,0,0,0,0,0,0,1,2017-01-01,-0.343154,-0.820006,-0.792066
1,2017-01-02,73,1,0,0,0,0,0,0,2017-01-02,0.579624,-1.667506,-0.792066
2,2017-01-03,87,0,1,0,0,0,0,0,2017-01-03,1.589692,-1.067194,-0.792066
3,2017-01-04,97,0,0,1,0,0,0,0,2017-01-04,-0.343154,-0.725840,-1.262831
4,2017-01-05,92,0,0,0,1,0,0,0,2017-01-05,-0.343154,-2.138339,-1.427599
5,2017-01-06,88,0,0,0,0,1,0,0,2017-01-06,-0.218454,-2.208964,-1.757135
6,2017-01-07,39,0,0,0,0,0,1,0,2017-01-07,0.592094,-2.832818,-2.580974
7,2017-01-08,40,0,0,0,0,0,0,1,2017-01-08,-0.318214,-2.997609,-2.828126
8,2017-01-09,74,1,0,0,0,0,0,0,2017-01-09,-0.343154,-2.950526,-2.710435
9,2017-01-10,67,0,1,0,0,0,0,0,2017-01-10,-0.343154,-2.020631,-2.322053


In [8]:
epochs = 1000

model = Sequential()
model.add(Dense(1000, activation='relu', input_shape=(10,)))
model.add(Dropout(0.5))
model.add(Dense(4000, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2000, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))
model.summary()

model.compile(loss='mean_absolute_percentage_error',
              optimizer=Adam(0.0001))

early_stopping = EarlyStopping(patience=50, verbose=1)
history = model.fit(x, y,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.1,
                    callbacks=[early_stopping])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 1000)              11000     
_________________________________________________________________
dropout_10 (Dropout)         (None, 1000)              0         
_________________________________________________________________
dense_15 (Dense)             (None, 4000)              4004000   
_________________________________________________________________
dropout_11 (Dropout)         (None, 4000)              0         
_________________________________________________________________
dense_16 (Dense)             (None, 2000)              8002000   
_________________________________________________________________
dropout_12 (Dropout)         (None, 2000)              0         
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 2001      
Total para

KeyboardInterrupt: 

array([[-0.26335078]], dtype=float32)