# Description
I will use several deep learning models for my time series predictions. 
* LSTM
* Transformer
* dialated CNN

In all cases I will include daily snowfall as an exogenous variable.

## Environment
For $ reasons I will use Colab

In [1]:
# get colab status
try:
    import google.colab
    IN_COLAB = True
    %tensorflow_version 2.x
except:
    IN_COLAB = False

In [2]:
# data wrangling
import numpy as np
import pandas as pd
import os.path

# viz
import altair as alt
import matplotlib.pyplot as plt
%matplotlib inline
from vapeplot import vapeplot
import seaborn as sns
from scipy import stats
from sklearn.preprocessing import MinMaxScaler

In [3]:
# local code with hack to avoid cloing full repo each time colab is run
if IN_COLAB:
    projectcode = r"https://github.com/chrisoyer/ski-snow-modeling/blob/master/src/analysis/project_utils/project_utils.py"
    ! wget $projectcode
from project_utils.project_utils import *

In [4]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

# Parameters

In [5]:
alt.renderers.enable(embed_options={'theme': 'vox'})
alt.data_transformers.disable_max_rows()
#plt_style = r'https://github.com/dhaitz/matplotlib-stylesheets/blob/master/pitayasmoothie-light.mplstyle'
#plt.style.use(plt_style)
plt.rc('figure', figsize=(11.0, 7.0))
batch_size = 15
logs_path = "./logs/visualize_graph"

lookback = 30  # days prior to use for prediction

# Plotting Functions

In [6]:
def error_plotter(history):
    """plots train and validation scores by epoch"""
    pal =  sns.blend_palette(vapeplot.palette('macplus'))
    sns.set_palette(pal)
    
    loss_train = history.history['train_loss']
    loss_val = history.history['val_loss']
    plt.plot(epochs, loss_train, 'g', label='Training loss')
    plt.plot(epochs, loss_val, 'b', label='Validation loss')
    plt.title('Training and Validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

# Load Data

In [7]:
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/gdrive')
    os.chdir(r'/content/gdrive/My Drive/data_sci/colab_datasets/ski/')
    all_data_path = r'./data/snow_data_clean.parquet'
    mirrored_strategy = tf.distribute.MirroredStrategy()
else:
    all_data_path = r'../../data/snow_data_clean.parquet'
!pwd

/c/Users/User/Documents/GitHub/ski-snow-modeling/src/analysis


In [8]:
snow_df = pd.read_parquet(all_data_path)

### Reshape for TF input
Shape should match (__samples__, __time steps__, __features__)

In [9]:
def data_slim(source=snow_df, station=None, region=None):
    """filters data for station OR region, relevant features, and returns np"""
    if station:
        source = source.query('station==@station')
    if region:
        source = source.query('region==@region')

    data_arr = (source
                .reset_index()
                [['base', 'dayofyr', 'snowfall']]
                .to_numpy()
                )
    return data_arr

In [10]:
# scale data
def scaler(X):
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(X)
    scaled_X = scaler.transform(X)

    # invert transform
    inverted_X = scaler.inverse_transform(scaled_X)
    return scaled_X#, inverted_X

In [11]:
copper = data_slim(station="Copper Mountain")

copper_scaled = scaler(copper)

In [12]:
def data_split(data=None, test_frac=.2, lookback=lookback, batch_size=batch_size):
    """split into train and test sets
    Params:
        data: endogenous should be first col
        test_size: fraction of data for test
    returns: (training data generator, test data generator)
    """
    data_rows = data.shape[0]
    test_size = int(data_rows * test_frac)
    train_size = data_rows - test_size
    train, test = data[:train_size, :], data[train_size:, :]
    gen_params = {'length': lookback,  # prior samples used for prediction
                  'sampling_rate': 1, 'stride': 1,
                  'batch_size': batch_size}
    train_data_gen = sequence.TimeseriesGenerator(data=train,
                                                  targets=train[:, 0],
                                                  **gen_params)
    test_data_gen = sequence.TimeseriesGenerator(data=test,
                                                 targets=test[:, 0],
                                                 **gen_params)
    return train_data_gen, test_data_gen


Xy_train, Xy_test = data_split(data=copper_scaled)

# Timeseries Modeling

The evolution of snow base depth over time depends (not 1:1; a foot of powder is only a few inches of packed powder) on new snowfall and melting of old snow. I will start by modeling as a simple timeseries, and then include new snowfall as a predictor variable.

## Modeling Setup
I will use supersetting crossvalidation (walk-forward CV) since this is a time series problem.

# TF LSTM models
 

In [29]:
def make_lstm(neurons=None, layers=None, batch_size=None, x_shape=None, lookback=lookback):
    """
    Parameters:
        neurons: width of layers, eg (4,5,6) implies first hidden layer has 4
            neuron, 2nd layer has 5, third layer has 6
        batch size: ...
        x_shape: (rows, features)
    Returns: unfitted model
    """
    input_shape = (lookback, x_shape[1])
    xlayer = inputs = Input(shape=input_shape, batch_size=batch_size)
    for layer in range(layers):
        xlayer = LSTM(units=neurons, batch_input_shape=input_shape, 
                   stateful=True, dropout=0.2, recurrent_dropout=0.2,)(xlayer)
    outputs = Dense(1)(xlayer)
    model = Model(inputs=inputs, outputs=outputs)
    metrics = ['mean_absolute_error', 'root_mean_squared_error']  # TODO: custom r2 func
    model.compile(loss="mse", metrics=metrics,
                  optimizer='adam')
    return model

def fit_model(model, X, batch_size, n_epoch):
    """runs the training; returns model and history"""
    for i in range(n_epoch):
        history = model.fit(X, epochs=n_epoch, batch_size=batch_size, 
                            shuffle=False, callbacks=[], 
                            verbose=1)
        model.reset_states()
    return model, history

# Vanilla LSTM Model

In [30]:
lstm_100x1 = make_lstm(neurons=100, layers=1,
                       batch_size=batch_size, x_shape=copper_scaled.shape)
lstm_100x1.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(15, 30, 3)]             0         
_________________________________________________________________
lstm_3 (LSTM)                (15, 100)                 41600     
_________________________________________________________________
dense_3 (Dense)              (15, 1)                   101       
Total params: 41,701
Trainable params: 41,701
Non-trainable params: 0
_________________________________________________________________


In [31]:
lstm_100x1, lstm_100x1_hst = fit_model(model=lstm_100x1, X=Xy_train,
                                       batch_size=batch_size, n_epoch=10)

Epoch 1/10


ValueError: in user code:

    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:543 train_step  **
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:391 update_state
        self._build(y_pred, y_true)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:322 _build
        self._metrics, y_true, y_pred)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\util\nest.py:1118 map_structure_up_to
        **kwargs)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\util\nest.py:1214 map_structure_with_tuple_paths_up_to
        *flat_value_lists)]
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\util\nest.py:1213 <listcomp>
        results = [func(*args, **kwargs) for args in zip(flat_path_list,
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\util\nest.py:1116 <lambda>
        lambda _, *values: func(*values),  # Discards the path arg.
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:421 _get_metric_objects
        return [self._get_metric_object(m, y_t, y_p) for m in metrics]
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:421 <listcomp>
        return [self._get_metric_object(m, y_t, y_p) for m in metrics]
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\engine\compile_utils.py:440 _get_metric_object
        metric_obj = metrics_mod.get(metric)
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\metrics.py:3358 get
        return deserialize(str(identifier))
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\metrics.py:3349 deserialize
        printable_module_name='metric function')
    c:\users\user\documents\github\ski-snow-modeling\venv\lib\site-packages\tensorflow\python\keras\utils\generic_utils.py:392 deserialize_keras_object
        raise ValueError('Unknown ' + printable_module_name + ':' + object_name)

    ValueError: Unknown metric function:root_mean_squared_error


In [19]:
error_plotter(lstm_100x1)

TypeError: 'History' object is not subscriptable

In [20]:
lstm_100x1_hst

<tensorflow.python.keras.callbacks.History at 0x1ea8e8cd3c8>

Batch Size: 1
Epochs: 3000
Neurons: 4
--------------
or 1 neuron

# Compare Models