# About
General template for univariate data analysis with DL Methods

# Libraries

In [1]:
%run "/home/cesar/Python_NBs/HDL_Project/HDL_Project/global_fv.ipynb"

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from keras.models import Sequential

# Layers
from keras.layers import LSTM, Dense, Bidirectional

# Optimizer
from tensorflow.keras.optimizers import Adam

import matplotlib.pyplot as plt

# Early stopping
from tensorflow.keras.callbacks import EarlyStopping

# Checkpoint
from keras.callbacks import ModelCheckpoint

# Global parameters

In [None]:
manual_tuning = False

# User-Defined Functions

In [None]:
# Example for Bidirectional-LSTM

def design_model(n_steps, n_features, lr):
    """
    n_steps: Number of steps
    n_features: Number of features
    lr: Learning rate
    """
    model = Sequential(name = "Bidirectional-LSTM-model")
    
    # Number of neurons (nodes) are just about greater than the number of features.
    # Rule of thumb is for number of neurons to be about 2/3 of the input    
    num_neurons = int(np.ceil(n_features *2 /3))
    
    # It'd be best for the number of neurons to be in the scale of 2^n for computational purposes.
    if(False):
        while num_neurons > 2**i:
            i += 1

        num_neurons = 2**i   
    
    # Bidirectional LSTM layer
    model.add(Bidirectional(LSTM(num_neurons, activation= 'relu' ), input_shape=(n_steps, n_features)))
    
    # Output layer with one neuron to a model instance (to return one output)
    model.add(Dense(1, kernel_initializer='normal',activation='linear'))
    # Regularization prevents the learning process to completely fit the model to the training data which can lead to overfitting.
    # The most common regularization method is dropout.
    #model.add(layers.Dropout(0.1))    
    
    # Optimizer
    opt = Adam(learning_rate=lr)
    model.compile(optimizer=opt,  metrics=['mae'], loss= 'mse')
    
    return model

In [None]:
def manual_tuning_lr(X, y, learning_rate, num_epochs, bs, model):
    y_axis_name = "Learning rate"
    
    #train the model on the training data
    history = model.fit(X, y, epochs=num_epochs, batch_size=bs, verbose = 0, validation_split = 0.2)

    # plot learning curves
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='validation')
    plt.title("Learning Rate: {}".format(str(learning_rate)) )
    plt.ylabel('Loss')
    plt.legend(loc="upper right")    
    return history

In [None]:
def manual_tuning_batches(X, y, learning_rate, num_epochs, bs, model):
    
    #train the model on the training data
    history = model.fit(X, y, epochs=num_epochs, batch_size = bs, verbose=0, validation_split = 0.2)
    
    # plot learning curves
    plt.plot(history.history['mae'], label='train')
    plt.plot(history.history['val_mae'], label='validation')
    plt.title('Batches = ' + str(bs))
    plt.ylabel('mae')
    plt.legend(loc="upper right")
    return history

In [None]:
def fit_model_es_checkpoint(X, y, learning_rate, num_epochs, bs, model):
    checkpoint_name = 'U_Weights_-{epoch:03d}--{val_loss:.5f}.hdf5'
    
    early_stopping_callback = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=30)
    checkpoint_callback = ModelCheckpoint(checkpoint_name, monitor='mae', verbose=0, save_best_only=True, mode='min')
    history = NN_model.fit(X, y, epochs=num_epochs, batch_size=bs, validation_split = 0.20, verbose = 0, callbacks=[early_stopping_callback, checkpoint_callback])
    return history

# Data

## Parameters

In [None]:
station_number = 'SE'
target_name = 'pm25'
# Number of time steps per sample
n_steps = 24

# Meteorological parameters
col_names = [i[0] for i in qdata("select meteorological_code from cat_meteorological_params")]

# Default neccesary columns
cols = "datetime, " + target_name

# Columns
for i in col_names:
    cols = cols + ", " + str(i)

print(cols)
    
# Where filter:
where_txt = "where datetime >= \'2021-01-01\'"

## Creating samples

In [None]:
# Initializing class
main_processed_df = univariate_samples(station_number, cols, where_txt)

# Execution of processing functions
#initial_df = main_processed_df.initial_df()

# Samples numpy.ndarray object 
X, y = main_processed_df.samples_creation(n_steps, target_name)
X = np.asarray(X)
y = np.asarray(y)

n_features = np.shape(X)[2]

In [None]:
# Understanding the three-dimensional structure of the input samples
print(np.shape(X))
print()
print("* The 1st dimension is the number of samples, in this case: {}".format(np.shape(X)[0])) 
print("* The 2nd dimension is the number of time steps per sample, in this case {}, the value specified to the function.".format(np.shape(X)[1]))
print("* The 3rd dimension specifies the number of parallel time series —or the number of variables— in this case {}.".format(np.shape(X)[2]))


# Data model

In [None]:
learning_rate = 0.01

NN_model = design_model(n_steps, n_features, learning_rate)
NN_model.summary()

In [None]:
#fixed number of epochs
num_epochs = 20

#fixed number of batches
batch_size = 8

if manual_tuning == True:
    # List of learning rates for  testing
    learning_rates = [1, 1E-1, 1E-2, 1E-3, 1E-4, 1E-5]

    plt.figure(figsize=(14,12))
    plt.ylim([150, 200])
    plt.subplots_adjust(bottom=0.1, top=1.4)
    plt.tight_layout()

    for i in range(len(learning_rates)): 
        plt.subplot(len(learning_rates), 1, (i+1))
        manual_tuning_lr(X, y, learning_rates[i], num_epochs, batch_size, design_model(n_steps, n_features, learning_rate))

    plt.show()

    plt.cla()
    plt.clf()
    plt.close('all')

In [None]:
#fixed number of epochs
num_epochs = 20

#fixed learning rate value
learning_rate = 0.01


if manual_tuning == True:
    #fixed number of batches
    batch_size = [8, 16, 32, 64, 128] 
    
    plt.figure(figsize=(14,12))
    plt.subplots_adjust(bottom=0.1, top=1.4)
    
    for i in range(len(batch_size)): 
        plt.subplot(len(batch_size), 1, (i+1))
        manual_tuning_batches(X, y, learning_rate, num_epochs, batch_size[i], design_model(n_steps, n_features, learning_rate))

    #plt.tight_layout()
    plt.show()

    plt.cla()
    plt.clf()
    plt.close('all')

In [None]:
#Training with early stopping

# Maximum number of epochs
num_epochs = 300

#fixed learning rate value
learning_rate = 0.01

#fixed number of batches
batch_size = 4

history = fit_model_es_checkpoint(X, y, learning_rate, num_epochs, batch_size, design_model(n_steps, n_features, learning_rate))
          
#plotting
plt.figure(figsize=(8,5))
plt.subplots_adjust(bottom=0.1, top=1.4)
plt.tight_layout()

# plot learning curves
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.title("Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.legend(loc="upper right")


# plot learning curves
plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='train')
plt.plot(history.history['val_mae'], label='validation')
plt.title("MAE")
plt.ylabel('mae')
plt.legend(loc="upper right")

plt.show()

plt.cla()
plt.clf()
plt.close('all')

print("Min training Loss:", min(history.history["loss"]))
print("Min validation Loss: ", min(history.history["val_loss"]))
print("")
print("Final training MAE:", history.history['mae'][-1])
print("Final validation MAE:", history.history['val_mae'][-1])

# Validation

In [None]:
# demonstrate prediction
x_input = np.array([[80, 85], [90, 95], [100, 105]])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)