# DL w/CV
* [Source for this notebook](https://www.machinecurve.com/index.php/2020/02/18/how-to-use-k-fold-cross-validation-with-keras/)
* [TIME SERIES FORCASTING WITH TENSORFLOW FROM THE AUTHORS!](https://www.tensorflow.org/tutorials/structured_data/time_series)

In [1]:
from copy import deepcopy
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.model_selection import KFold, TimeSeriesSplit, train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
#from sklearn.metrics import mean_squared_error as mse
import time
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Bidirectional, LSTM, Dropout, ConvLSTM2D, Flatten, TimeDistributed, RepeatVector
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.utils.generic_utils import get_custom_objects
from keras import backend as K
from keras.layers import Activation
from keras.optimizers import Adam
from helper import ( prepare_data, train_test_shuffle_split, train_test_seq_split, print_folds_stats )

from matplotlib import pyplot as plt
plt.style.use('ggplot')     # 'fivethirtyeight'
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
mse = tf.keras.losses.MeanSquaredError()
#mae = tf.keras.losses.MeanAbsoluteError()
#mape = tf.keras.losses.MeanAbsolutePercentageError()
#msle = tf.keras.losses.MeanSquaredLogarithmicError()        # square(log(y_true + 1.) - log(y_pred + 1.))
#cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)      # reduction=tf.keras.losses.Reduction.SUM ?

## Prepare
If the number of features is more than one

In [3]:
df = pd.read_csv( 'data/2step_20210329.csv', encoding='utf-8' )
print('Shape of loaded data:', df.shape, '\n')

n_steps      = 4
random_state = 34
features     = ['num1', 'num2', 'num3', 'num4']
n_features   = len(features)

( X,
  y,
  goldenx,
  goldeny )  = prepare_data( df,
                             features,
                             n_steps,
                             with_intersection=True,
                             flatten=False )

tscv = TimeSeriesSplit( n_splits=5, max_train_size=int( len(X)*0.8 ) )                    # no random_state

X_sh, y_sh = deepcopy(X), deepcopy(y)
X_sh, y_sh = shuffle( X_sh, y_sh, random_state=random_state, n_samples=None )
kf = KFold( n_splits=5, shuffle=True, random_state=random_state )

print_folds_stats( X, X_sh, tscv, kf )

Shape of loaded data: (2073, 8) 

Dropping 1 last row for golden data point
Data prepared:
	Size of X = (2068 by 4)
	Size of y = (2068 by 4)

TimeSeriesSplit indices:
(Train, test): (348, 344)
(Train, test): (692, 344)
(Train, test): (1036, 344)
(Train, test): (1380, 344)
(Train, test): (1654, 344)

KFold indices:
(Train, test): (1654, 414)
(Train, test): (1654, 414)
(Train, test): (1654, 414)
(Train, test): (1655, 413)
(Train, test): (1655, 413)


## One Run

TRY 1 BiLSTM LAYER, DROPOUT() AS A SEPARATE LAYER  
metrics = [ 'mse', 'mae', 'mape', 'msle', 'cosine_similarity' ]  
do not use sigmoid or tanh!

In [6]:
def stacked_BiLSTM( n_steps, n_features,
                    learning_rate=0.005,
                    units=750,
                    activation='relu',
                    dropout1=0,
                    dropout2=0,
                    dropout3=0,
                    optimizer='adam',
                    loss=mse,
                    print_architecture=False ):
    
    model = Sequential()
    model.add( Bidirectional( LSTM( units, activation=activation,
                                    dropout=dropout1,
                                    recurrent_dropout=dropout2,
                                    return_sequences=True),
                                                          
                              input_shape=(n_steps, n_features)
                            )
             )
        
    model.add( Bidirectional( LSTM( units, activation=activation ) ) )
    model.add(Dropout( dropout3 ))
    model.add( Dense( n_features ) )
    
    # 'mse', mae', 'mape', 'msle', 'cosine_similarity'
    model.compile( optimizer=Adam(lr=learning_rate),
                   loss=loss,
                   metrics=[tf.keras.metrics.LogCoshError()] )     # metrics=['mse'], optimizer=Adam(lr=learning_rate)
        
    if print_architecture:
        print(model.summary())
        
    return model

In [10]:
def stacked_LSTM(   n_steps, n_features,
                    learning_rate=0.005,
                    units=750,
                    activation='relu',
                    dropout1=0,
                    dropout2=0,
                    dropout3=0,
                    optimizer='adam',
                    loss=mse,
                    print_architecture=False ):
    
    model = Sequential()
    model.add( LSTM( units, activation=activation,
                                    dropout=dropout1,
                                    recurrent_dropout=dropout2,
                                    return_sequences=True,
                                                          
                              input_shape=(n_steps, n_features)
             ))
        
    model.add( LSTM( units, activation=activation ) )
    model.add(Dropout( dropout3 ))
    model.add( Dense( n_features ) )
    
    # 'mse', mae', 'mape', 'msle', 'cosine_similarity'
    model.compile( optimizer=Adam(lr=learning_rate),
                   loss=loss,
                   metrics=[tf.keras.metrics.LogCoshError()] )     # metrics=['mse'], optimizer=Adam(lr=learning_rate)
        
    if print_architecture:
        print(model.summary())
        
    return model

In [11]:
# PRINT AS AN EXAMPLE
model = stacked_LSTM( n_steps, n_features, print_architecture=True )

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 4, 750)            2265000   
_________________________________________________________________
lstm_6 (LSTM)                (None, 750)               4503000   
_________________________________________________________________
dropout_2 (Dropout)          (None, 750)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 3004      
Total params: 6,771,004
Trainable params: 6,771,004
Non-trainable params: 0
_________________________________________________________________
None


In [13]:
batch_size = 32
epochs     = 6
verbose    = 2
score_per_fold, loss_per_fold = [], []

fold_no = 1
for train_idx, test_idx in kf.split( X_sh ):

    # Define and compile model
    model = stacked_BiLSTM( n_steps, n_features )
    
    print('\n', '='*100, '\n', sep='')
    print('Training in fold {} ...'.format( fold_no ))

    # Fit data to model
    history = model.fit( X_sh[train_idx].astype(np.float), y_sh[train_idx].astype(np.float),
                         batch_size=batch_size,
                         epochs=epochs,
                         #shuffle=False,
                         verbose=verbose )

    # Generate metrics
    scores = model.evaluate( X_sh[test_idx].astype(np.float), y_sh[test_idx].astype(np.float), verbose=verbose )
    print(f'Score in fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]}')
    score_per_fold.append( scores[1] )
    loss_per_fold.append(  scores[0] )

    # Increase fold number
    fold_no = fold_no + 1



Training in fold 1 ...
Epoch 1/6
52/52 - 10s - loss: 48740.1094 - logcosh: 39.7523
Epoch 2/6
52/52 - 10s - loss: 108.4679 - logcosh: 8.2609
Epoch 3/6
52/52 - 10s - loss: 105.0282 - logcosh: 8.1604
Epoch 4/6
52/52 - 10s - loss: 104.3610 - logcosh: 8.1390
Epoch 5/6
52/52 - 10s - loss: 104.4013 - logcosh: 8.1488
Epoch 6/6
52/52 - 10s - loss: 104.3441 - logcosh: 8.1158
13/13 - 1s - loss: 107.9290 - logcosh: 8.3244
Score in fold 1: loss of 107.92896270751953; logcosh of 8.324374198913574


Training in fold 2 ...
Epoch 1/6
52/52 - 10s - loss: 77835.2656 - logcosh: 48.8347
Epoch 2/6
52/52 - 11s - loss: 108.4825 - logcosh: 8.3046
Epoch 3/6
52/52 - 11s - loss: 106.3604 - logcosh: 8.2258
Epoch 4/6
52/52 - 10s - loss: 106.0916 - logcosh: 8.2426
Epoch 5/6
52/52 - 11s - loss: 106.8620 - logcosh: 8.2608
Epoch 6/6
52/52 - 10s - loss: 105.4269 - logcosh: 8.2126
13/13 - 1s - loss: 103.1002 - logcosh: 8.0581
Score in fold 2: loss of 103.10015869140625; logcosh of 8.058141708374023


Training in fold 3

In [14]:
# Average scores
print( '\n', '='*100, sep='' )
print('Per fold:')
print( '{:>10}{:^10}{:^10}{:^10}{:^10}{:^10}'.format('', 1, 2, 3, 4, 5) )
print( '{:<10}{:^10}{:^10}{:^10}{:^10}{:^10}'.format( 'Loss', *[ round(i, 4) for i in loss_per_fold ] ) )
print( '{:<10}{:^10}{:^10}{:^10}{:^10}{:^10}'.format( 'Score', *[ round(i, 4) for i in score_per_fold ] ) )
print( '='*100 )
print(f'Average loss:  {round( np.mean(loss_per_fold), 4 )}  +-{round( np.std(loss_per_fold), 4 )}')
print(f'Average score: {round( np.mean(score_per_fold), 4)}')
print( '='*100 )


Per fold:
              1         2         3         4         5     
Loss       107.929   103.1002  103.5389  106.6171  103.2017 
Score       8.3244    8.0581    8.1629    8.2585    8.1058  
Average loss:  104.8774  +-2.0048
Average score: 8.182


## Visualizations

In [15]:
y1 = [i[0] for i in metrics[:-12]]
y2 = [i[1] for i in metrics[:-12]]
print('MSE scores:', y1)
print('Log cosh scores:', y2)
x = range(0,len(y1))
xtick_labels = [i[2] for i in metrics[:-12]]

plt.figure(figsize=(15,5))

plt.subplot(1, 2, 1)
plt.plot(y1)
plt.xticks(x, xtick_labels)
plt.title('HP Gridsearch: Epochs at Batch Size = 8 (TimeSeries)')
plt.ylabel('MSE')
plt.xlabel('Epochs')

plt.subplot(1, 2, 2)
plt.plot(y2)
plt.xticks(x, xtick_labels)
plt.title('HP Gridsearch: Epochs at Batch Size = 8 (TimeSeries)')
plt.ylabel('Logcosh')
plt.xlabel('Epochs')

plt.tight_layout()
plt.show()

NameError: name 'metrics' is not defined