In [None]:
import pandas as pd
import numpy as np

import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, LeakyReLU, Input, concatenate
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam, RMSprop
from keras.regularizers import l1_l2


import glob
import os

from scipy.stats import zscore
from functools import reduce

In [None]:
weather_path = os.path.join('..','data','weather','precip_temp.csv')
weather_df = pd.read_csv(weather_path)
clients_path = os.path.join('..','data','wifi','**','Clients per day.csv')
clients_df = pd.concat(map(lambda csv: pd.read_csv(csv, parse_dates=[0]),
                           glob.glob(clients_path)), ignore_index=True)
sessions_path = os.path.join('..','data','wifi','**','Number of sessions over time.csv')
sessions_df = pd.concat(map(lambda csv: pd.read_csv(csv, parse_dates=[0]),
                               glob.glob(sessions_path)), ignore_index=True)
usage_path = os.path.join('..','data','wifi','**','Usage over time.csv')
usage_df = pd.concat(map(lambda csv: pd.read_csv(csv, parse_dates=[0]),
                               glob.glob(usage_path)), ignore_index=True)

In [None]:
# Add day of the week as a one-hot
weekday_labels = []
for d in range(7):
    label = 'weekday-%i' % d
    weekday_labels.append(label)
    clients_df[label] = [int(dt.weekday()==d) for dt in clients_df['Time']]
    
# Put wifi date in the same format as it is in the weather data
clients_df['Time'] = [dt.strftime('%Y-%m-%d') for dt in clients_df['Time']]
sessions_df['Time'] = [dt.strftime('%Y-%m-%d') for dt in sessions_df['Time']]
usage_df['Date'] = [dt.strftime('%Y-%m-%d') for dt in usage_df['Time']]
usage_df['Hour'] = [dt.hour for dt in usage_df['Time']]


all_data = clients_df.merge(weather_df, left_on='Time', right_on='DATE') \
    .merge(sessions_df, left_on='Time', right_on='Time')

# Put 4-hour chunks together into rows by day
usage_labels = set()
for index, row in usage_df.iterrows():
    download_label = 'download-%i' % row['Hour']
    all_data.loc[all_data['DATE'] == row['Date'], download_label] \
        = row['Download (B)']
    usage_labels.add(download_label)
    total_label = 'total-%i' % row['Hour']
    all_data.loc[all_data['DATE'] == row['Date'], total_label] \
        = row['Total (B)']
    usage_labels.add(total_label)
usage_labels = list(usage_labels)

# Normalize some inputs using z-scores
cols_to_norm = ['TMIN', 'TMAX', 'PRCP']
for col in cols_to_norm:
    all_data[col] = zscore(all_data[col])

# Separate inputs into categories for encoding
day_x = all_data[weekday_labels].values
weather_x = all_data[['PRCP', 'TMAX', 'TMIN']].values
date_x = np.expand_dims(np.arange(0, 353/365, 1/365).astype('float32'), axis=1)

y_labels = ['# Clients'] + usage_labels
y = all_data[y_labels].values
all_data

In [None]:
epochs = 1000
reg = None # l1_l2(0.01,0.02)

def stack_layers(layers):
    return reduce(lambda stack, e: e(stack), layers)

weather_layers = \
  [Input((3,))] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(5, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(4, activation='relu',kernel_regularizer=reg)]

weather_out = stack_layers(weather_layers)

day_layers = \
  [Input((7,))] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(5, activation='relu',kernel_regularizer=reg)]

day_out = stack_layers(day_layers)

date_layers = \
  [Input((1,))] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(5, activation='relu',kernel_regularizer=reg)]

date_out = stack_layers(date_layers)

main_layers = \
  [concatenate([weather_out, day_out, date_out])] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(10, activation='relu',kernel_regularizer=reg)] \
+ [Dropout(0.5)] \
+ [Dense(13, activation='linear')]

main_out = stack_layers(main_layers)

model = Model(inputs=[weather_layers[0], day_layers[0], date_layers[0]], outputs=[main_out])
model.compile(loss='mean_absolute_percentage_error',
              optimizer=Adam(0.0001))

model.summary()

early_stopping = EarlyStopping(patience=50, verbose=1)
history = model.fit([weather_x,day_x,date_x], [y],
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.1,
                    callbacks=[early_stopping])

In [None]:
evalSeg=317
print (weather_x[evalSeg:].shape)
model.evaluate(x=[weather_x[evalSeg:],day_x[evalSeg:],date_x[evalSeg:]],y=[y[evalSeg:]])