# Train ANN with DSM2 data

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras import layers
#import keras

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

import hvplot.pandas
import panel as pn
import holoviews as hv
hv.extension('bokeh')

import os

In [None]:
dflist = [pd.read_excel('./dsm2_ann_BaselineData_20220120.xlsx',i,index_col=0,parse_dates=True) for i in range(8)]

In [None]:
dfinps = pd.concat(dflist[0:7],axis=1)
dfinps.head()

In [None]:
dfouts = dflist[7]

In [None]:
dfouts

## Tensorflow Board Setup
A log directory to keep the training logs

Tensorboard starts a separate process and is best started from the command line. Open a command window and activate this environment (i.e. keras) and goto the current directory. Then type in
```
tensorboard --logdir=./tf_training_logs/ --port=6006
```

In [None]:
from tensorflow import keras

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir=./tf_training_logs/ --port=6006
root_logdir = os.path.join(os.curdir, "tf_training_logs")
tensorboard_cb = keras.callbacks.TensorBoard(root_logdir)## Tensorflow Board Setup

# Calibration and Validation Periods
Calibration is from 1940 - 2015 and Validation from 1923 - 1939 as per the Calsim 3 ANN paper

The output locations are names of the columns in the output(labels) csv files. For each location, an ANN is trained on all the specified data sets

In [None]:
output_locations = list(dfouts.columns)
calib_slice = slice('1990', '2020')
valid_slice = slice('1990', '2000')

In [None]:
# Define Sequential model with 3 layers
NFEATURES = 126  # (8 + 10)*7


def build_model(nhidden1=8, nhidden2=2, act_func='sigmoid'):
    model = keras.Sequential(
        [
            layers.Input(shape=(NFEATURES)),
            layers.Dense(nhidden1, activation=act_func),
            layers.Dense(nhidden2, activation=act_func),
            layers.Dense(1, activation=keras.activations.linear)
        ])
    model.compile(optimizer=keras.optimizers.Adam(
        learning_rate=0.001), loss="mse")
    #model.compile(optimizer=keras.optimizers.RMSprop(), loss="mse")
    return model

In [None]:
import annutils

In [None]:
for location in output_locations[0:1]:
    output_location = '%s' % location
    # create tuple of calibration and validation sets and the xscaler and yscaler on the combined inputs
    (xallc, yallc), (xallv, yallv), xscaler, yscaler = \
        annutils.create_training_sets([dfinps],
                                      [dfouts[[output_location]]],
                                      calib_slice=calib_slice,
                                      valid_slice=valid_slice)
    model = build_model(8, 2, act_func='sigmoid')
    display(model.summary())
    history = model.fit(
        xallc,
        yallc,
        epochs=5000,
        batch_size=128,
        validation_data=(xallv, yallv),
        callbacks=[
            keras.callbacks.EarlyStopping(
                monitor="val_loss", patience=50, mode="min", restore_best_weights=True),
            tensorboard_cb
        ],
    )
    # pd.DataFrame(history.history).hvplot(logy=True) # if you want to view the graph for calibration/validation training
    annutils.save_model(location, model, xscaler, yscaler)

# Show the performance on the data sets visually

Change the location to one of the locations for which the ANN is trained and run cells below to see performance on one or more of the data sets

In [None]:
output_location

In [None]:
location = 'Antioch'
output_location = '%s' % location
print('Location: ', location)
annmodel = annutils.load_model(location)

In [None]:
annutils.show_performance(annmodel.model, dfinps,
                          dfouts[output_location], annmodel.xscaler, annmodel.yscaler)

# Display weights and x and y scaling parameters


In [None]:
annmodel.model.get_weights()

In [None]:
annmodel.xscaler.data_min_, annmodel.xscaler.data_max_

In [None]:
annmodel.xscaler.feature_range

In [None]:
annmodel.xscaler.min_

In [None]:
annmodel.xscaler.scale_

# Calibration and Validation Periods
Calibration is from 1940 - 2015 and Validation from 1923 - 1939 as per the Calsim 3 ANN paper

The output locations are names of the columns in the output(labels) csv files. For each location, an ANN is trained on all the specified data sets