# Notebook Overview

## Inputs
The input variables for all trials in this notebook are the following:
```py
include_fields = ['day_of_week','hours_l1','hours_l2','hours_l3','hours_l4',
                  'hours_l5','hours_l6','hours_l7','hours_l8','hours_l14','avg_employees',
                  'perc_hours_today_before', 'perc_hours_yesterday_before', 'perc_hours_tomorrow_before']
```

## Models and Hyperparameters
The following hyperparameters are explored using a grid search. 
```py
Num_Layers = [2,4,6,8]
Num_Units = [8,16,24,32]
Dropout_Rate = [0.25,0.5,0.75]
```
Each combination of num_layers, num_units, and dropout_rate is trained on the train data and validated on the crossvalidation data. The outputted numbers in the cells below correspond to the crossvalidation loss and metric scores. 

For a given permuation {l,u,r} of {num_layers,num_units,dropout_rate}, a model contains (l-1) layers of width u, each with dropout rate r applied to its output, connected to a final layer that outputs a prediction. 

In [1]:
import pandas as pd
import time
import tensorflow as tf


include_fields = ['hours','day_of_week','hours_l1','hours_l2','hours_l3','hours_l4',
                  'hours_l5','hours_l6','hours_l7','hours_l8','hours_l14','avg_employees',
                  'perc_hours_today_before', 'perc_hours_yesterday_before', 'perc_hours_tomorrow_before']

startTime = time.time()
train = pd.read_csv("/export/storage_adgandhi/PBJhours_ML/Data/Intermediate/train_test_validation/training_set.csv",usecols=include_fields).dropna()
val = pd.read_csv("/export/storage_adgandhi/PBJhours_ML/Data/Intermediate/train_test_validation/crossvalidation_set.csv",usecols=include_fields).dropna()
print(f"Loaded Train and Validation sets. Time taken: {time.time()-startTime}")
print(train.head)

Loaded Train and Validation sets. Time taken: 243.99558639526367
<bound method NDFrame.head of            hours  day_of_week  hours_l1  hours_l2  hours_l3  hours_l4  \
0            0.0            4       0.0      0.00       0.0      7.50   
1            7.5            3       0.0      0.00       7.5      0.00   
2            0.0            2       0.0      7.50       0.0      0.00   
3            0.0            1       7.5      0.00       0.0      0.00   
4            7.5            0       0.0      0.00       0.0      0.00   
...          ...          ...       ...       ...       ...       ...   
146318634    7.0            5       0.0      6.75       7.5      7.75   
146318636    0.0            0       0.0     12.00       0.0      0.00   
146318638    0.0            2       0.0      0.00       0.0     12.00   
146318639    0.0            1       0.0      0.00      12.0      0.00   
146318641    0.0            4       0.0      0.00       0.0      0.00   

           hours_l5  hours_l

In [2]:
train_inputs, train_labels = train.drop(['hours'], axis=1), train.filter(['hours'])
val_inputs, val_labels = val.drop(['hours'], axis=1), val.filter(['hours'])

In [3]:
#appends one hot expansion of selected labels to end of dataframe (axis 1)
def expand_one_hot(labels,dataset):
    outList = []
    for label in labels:  
        col = dataset[label]
        ###Generate a dict for all unique values (Don't waste space encoding non important job id's)
        map = {}
        index = 0
        for element in col.unique():
            map[element] = index
            index += 1
        col = col.map(map)
        tensor = tf.one_hot(col,len(col.unique()))
        outList.append(tensor)
        dataset = dataset.drop(columns=[label])
    
    outList.append(dataset)
    output = tf.concat(outList,1)
    return output

train_inputs = expand_one_hot(['day_of_week'],train_inputs)
val_inputs = expand_one_hot(['day_of_week'],val_inputs)

print(train_inputs.shape)

(137722552, 20)


In [4]:
strategy = tf.distribute.MirroredStrategy()
BUFFER_SIZE = 10000
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
BATCH_SIZE_PER_REPLICA = 512
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
trainSet = tf.data.Dataset.from_tensor_slices((train_inputs,train_labels)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
valSet = tf.data.Dataset.from_tensor_slices((val_inputs,val_labels)).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

print(trainSet)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Number of devices: 1
<BatchDataset shapes: ((None, 20), (None, 1)), types: (tf.float32, tf.float64)>


In [5]:
def train_test_model(hparams):
    with strategy.scope():
        model = tf.keras.models.Sequential()

        #Build model to depth specificed in hparams[HP_NUM_LAYERS]
        for i in range(hparams[HP_NUM_LAYERS]):
            if i+1 == hparams[HP_NUM_LAYERS]:
                model.add(tf.keras.layers.Dense(1))
            elif i == 0:
                model.add(tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu))
            else:
                model.add(tf.keras.layers.Dropout(hparams[HP_DROPOUT]))
                model.add(tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu))           
    
        model.compile(
            loss=tf.keras.losses.MeanSquaredError(),
            optimizer=tf.keras.optimizers.Adam(),
            metrics=[tf.keras.metrics.MeanAbsoluteError()]
        )
    
    callback = tf.keras.callbacks.EarlyStopping(
                    monitor='val_loss', min_delta=0.01, patience=1, verbose=0,
                    mode='auto', baseline=None, restore_best_weights=False
                )
    
    startTime = time.time()
    model.fit(trainSet, epochs=10, verbose=0, validation_data=valSet, callbacks=[callback]) 
    _, accuracy = model.evaluate(valSet)
    return accuracy, time.time()-startTime

In [6]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy, time = train_test_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)
        print(f"MAE: {accuracy}    Time Taken: {time} seconds")

In [7]:
from tensorboard.plugins.hparams import api as hp

HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([8,16,24,32]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.25, 0.5, 0.75]))
HP_NUM_LAYERS = hp.HParam('num_layers', hp.Discrete([2,4,6,8]))

METRIC_ACCURACY = 'Mean Squared Error'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_NUM_LAYERS],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Mean Squared Error')],
    )

In [None]:
session_num = 0
for num_layers in HP_NUM_LAYERS.domain.values:
    for dropout_rate in HP_DROPOUT.domain.values:
        for num_units in HP_NUM_UNITS.domain.values:
            if int(num_layers)==2 and float(dropout_rate)!=0.25:
                continue
            hparams = {
                HP_NUM_UNITS: num_units,
                HP_DROPOUT: dropout_rate,
                HP_NUM_LAYERS: num_layers,
            }
            run_name = "run-%d" % session_num
            print('--- Starting trial: %s' % run_name)
            print({h.name: hparams[h] for h in hparams})
            run('logs/hparam_tuning/' + run_name, hparams)
            session_num += 1

--- Starting trial: run-0
{'num_units': 8, 'dropout': 0.25, 'num_layers': 2}
MAE: 2.971541166305542    Time Taken: 1590.6408619880676 seconds
--- Starting trial: run-1
{'num_units': 16, 'dropout': 0.25, 'num_layers': 2}
MAE: 2.9142937660217285    Time Taken: 2027.4406867027283 seconds
--- Starting trial: run-2
{'num_units': 24, 'dropout': 0.25, 'num_layers': 2}
MAE: 2.9398937225341797    Time Taken: 1107.9467117786407 seconds
--- Starting trial: run-3
{'num_units': 32, 'dropout': 0.25, 'num_layers': 2}
MAE: 2.9138858318328857    Time Taken: 1067.9047322273254 seconds
--- Starting trial: run-4
{'num_units': 8, 'dropout': 0.25, 'num_layers': 4}
MAE: 3.6050760746002197    Time Taken: 1216.3100955486298 seconds
--- Starting trial: run-5
{'num_units': 16, 'dropout': 0.25, 'num_layers': 4}
MAE: 3.320221185684204    Time Taken: 1765.823754310608 seconds
--- Starting trial: run-6
{'num_units': 24, 'dropout': 0.25, 'num_layers': 4}
MAE: 3.166147470474243    Time Taken: 1221.4553463459015 second

In [8]:
from tensorboard.plugins.hparams import api as hp

HP_NUM_UNITS = hp.HParam('num_units', hp.Discrete([8,16,24,32,64]))
HP_DROPOUT = hp.HParam('dropout', hp.Discrete([0.2, 0.4, 0.6]))
HP_NUM_LAYERS = hp.HParam('num_layers', hp.Discrete([3]))

METRIC_ACCURACY = 'Mean Squared Error'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_NUM_UNITS, HP_DROPOUT, HP_NUM_LAYERS],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Mean Squared Error')],
    )
    
session_num = 16
for num_layers in HP_NUM_LAYERS.domain.values:
    for dropout_rate in HP_DROPOUT.domain.values:
        for num_units in HP_NUM_UNITS.domain.values:
            if int(num_layers)==2 and float(dropout_rate)!=0.25:
                continue
            hparams = {
                HP_NUM_UNITS: num_units,
                HP_DROPOUT: dropout_rate,
                HP_NUM_LAYERS: num_layers,
            }
            run_name = "run-%d" % session_num
            print('--- Starting trial: %s' % run_name)
            print({h.name: hparams[h] for h in hparams})
            run('logs/hparam_tuning/' + run_name, hparams)
            session_num += 1

--- Starting trial: run-16
{'num_units': 8, 'dropout': 0.2, 'num_layers': 3}
MAE: 3.144009590148926    Time Taken: 3140.2818489074707 seconds
--- Starting trial: run-17
{'num_units': 16, 'dropout': 0.2, 'num_layers': 3}
MAE: 3.0855648517608643    Time Taken: 1805.8771080970764 seconds
--- Starting trial: run-18
{'num_units': 24, 'dropout': 0.2, 'num_layers': 3}
MAE: 3.0200936794281006    Time Taken: 1800.3471038341522 seconds
--- Starting trial: run-19
{'num_units': 32, 'dropout': 0.2, 'num_layers': 3}
MAE: 3.009599208831787    Time Taken: 1763.9263377189636 seconds
--- Starting trial: run-20
{'num_units': 64, 'dropout': 0.2, 'num_layers': 3}
MAE: 2.895702838897705    Time Taken: 1243.4794852733612 seconds
--- Starting trial: run-21
{'num_units': 8, 'dropout': 0.4, 'num_layers': 3}
MAE: 3.492199182510376    Time Taken: 1226.787061214447 seconds
--- Starting trial: run-22
{'num_units': 16, 'dropout': 0.4, 'num_layers': 3}
MAE: 3.3123090267181396    Time Taken: 1732.9405970573425 seconds