In [16]:
#!pip install pandas
#!pip install seaborn
#!pip install openpyxl

In [17]:
import pandas as pd
import numpy as np
import time
import csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import model_from_json
from tensorflow import data
from matplotlib import pyplot
from sklearn.metrics import mean_squared_error
from sklearn.metrics import median_absolute_error
from sklearn.preprocessing import MinMaxScaler

In [18]:
# #from google.colab import drive
#drive.mount('/content/drive')
file_data = 'lynx.csv'
path_name='../datasets/'
path_name_results='../results/'
file_result = 'Result_LSTM_canadian_lynx.csv'

In [19]:
dataset = pd.read_csv(f'{path_name}{file_data}', sep =',', encoding = 'latin1', decimal='.',usecols=[2])
dataset.columns = ['value']
#dataset.columns = ['month','year', 'value']
#dataset.values

In [20]:
#checks if there are null variables
dataset.isna().sum()

value    0
dtype: int64

In [21]:
def salvar_resultado(nm_dataset, ds_best_param, n_time_steps, MSE, RMSE, MAE, MAPE, sMAPE, Duration):
  #Script to write training cycle results
  data = [nm_dataset, ds_best_param, n_time_steps, MSE, RMSE, MAE, MAPE, sMAPE, Duration]
  fields = ['Dataset','Best Params','n_time_steps','MSE', 'RMSE', 'MAE', 'MAPE','sMAPE','Duration']
  with open(f'{path_name_results}{file_result}', "a",newline='') as csv_file:
    writer = csv.writer(csv_file,delimiter=';')
    writer.writerow(data)  
  print(fields)
  print(data)
    
#Script to create the results file
def criar_arquivo_resultado():
  fields = ['Dataset','Best Params','n_time_steps','MSE', 'RMSE', 'MAE','MAPE','sMAPE','Duration']
  with open(f'{path_name_results}{file_result}', "w",newline='') as csv_file:
    writer = csv.writer(csv_file,delimiter=';')
    writer.writerow(fields)    

In [22]:
# convert an array of values into a dataset matrix
def create_matrix_dataset(dataset, n_time_steps=1):
 dX, dY = [], []
 for i in range(len(dataset)-n_time_steps-1):
  a = dataset[i:(i+n_time_steps), 0]
  dX.append(a)
  dY.append(dataset[i + n_time_steps, 0])
 return np.array(dX), np.array(dY)

In [23]:
  
def save_model(model,n_time_steps):
  # serialize model to JSON
  model_json = model.to_json()
  with open(f'{path_name_results}model_{n_time_steps}.json', "w") as json_file:
    json_file.write(model_json)

  # serialize weights to HDF5
  model.save_weights(f'{path_name_results}model_{n_time_steps}.h5')
  print("Saved model to disk")


In [24]:
def gera_resultado(y_test, predict,nm_dataset, resultado, n_time_steps, Duracao):
 #Mean Squared Error (Mean Squared Difference Between Estimated Values and Actual Values) - MSE
 MSE = mean_squared_error(y_test, predict)    
 #Square Root of Mean Error - RMSE
 RMSE = np.sqrt(mean_squared_error(y_test, predict))    
 #Mean Absolute Distance or Mean Absolute Error - MAE
 MAE= median_absolute_error(y_pred=predict, y_true = y_test) 
  
 #Calculate the MAPE (Mean Absolute Percentage Error)
 MAPE = ((np.mean(np.abs(y_test -predict) / (y_test)))) * 100   
  
 sMAPE = round(
 	np.mean(
 		np.abs(predict - y_test) /
 		((np.abs(predict) + np.abs(y_test)))
 	)*100, 2
 ) 
 salvar_resultado(nm_dataset, resultado, n_time_steps, MSE, RMSE, MAE, MAPE, sMAPE, Duracao)

In [25]:
def previsao_LSTM(nm_dataset, dataset, n_time_steps, l1, l2, l3,num_epochs, batch_size ): 
 #num_epochs = 100 # number of epochs for train
 #n_time_steps = 6
 #l1, l2, l3 = 32 , 64, 32  # 4.61 smape   
 #l1, l2, l3 = 64 , 64, 64  # 4.61 smape
 #nm_dataset ='sunspot'
 #Split dataset in treinam /  80% treinam  20% test
 dataset=np.array(dataset) 
 nlinhas = int(len(dataset) * 0.80)
 test = dataset[nlinhas:len(dataset),:]  
 train = dataset[0:nlinhas,:] 
 #  reshape into X=t and Y=t+1 ot n_time_steps by steps
 #n_time_steps = 5
 X_train, Y_train = create_matrix_dataset(train, n_time_steps)
 X_test, Y_test = create_matrix_dataset(test, n_time_steps) 
 #X_train.shape , Y_train.shape , X_test.shape , Y_test.shape  
 #reshape input to be [samples, time steps, features]
 X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
 X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) 
 # Stores the training execution start time
 Hora_Inicio = time.time()
   
 resultado = "LSTM (" + str(l1) + "," + str(l2) + "," + str(l3) + ") n_time_steps=" + str(n_time_steps) + str(" epochs=") + str(num_epochs)
 print(resultado)
 # create and fit the LSTM network

 steps_per_epoch = len(X_train) 
 model = Sequential()
 model.add(LSTM(l1, batch_input_shape=(batch_size, n_time_steps, 1 ), stateful=True, return_sequences=True))
 model.add(LSTM(l2, batch_input_shape=(batch_size, n_time_steps, 1 ), stateful=True, return_sequences=True))
 model.add(LSTM(l3, batch_input_shape=(batch_size, n_time_steps, 1 ), stateful=True))
 model.add(Dense(1))
 model.compile(loss='mean_squared_error', optimizer='adam',run_eagerly=True)
 
 #equalize train data to be multiple of batch_size
 train_size = int(np.trunc(len(X_train) / batch_size))
 X_train = X_train[0:(train_size * batch_size),:]
 Y_train = Y_train[0:(train_size * batch_size)]
 
 #equalize test data to be multiple of batch_size
 test_size = int(np.trunc(len(X_test) / batch_size))
 X_test = X_test[0:(test_size * batch_size),:]
 Y_test = Y_test[0:(test_size * batch_size)]
 
 for i in range(num_epochs):	
   model.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=1, shuffle=False)
   model.reset_states()
 
 #predict values to X_test with size multiple of batch_size
 predict = model.predict(X_test, batch_size=batch_size)   
 
 Y_test = [Y_test]
 predict = predict.reshape(-1,1)[0:batch_size]   
 Y_test=np.array(Y_test[0][0:batch_size]).reshape(batch_size,1) 
 
 Hora_Fim = time.time()   
 #Calculate the duration of the training execution
 Duracao = Hora_Fim - Hora_Inicio   
 
 #calc metrics of error and save in file
 gera_resultado(Y_test, predict,nm_dataset, resultado, n_time_steps, Duracao)



In [26]:
#create file to results
criar_arquivo_resultado()

print('forecast for canadian lynx')
num_epochs = 100 # number of epochs for train
batch_size = 6

#n_time_steps, l1, l2, l3 = 1, 32, 64, 32  #  83.40388528527126, 79.44, 24.19486141204834
#n_time_steps, l1, l2, l3 = 1, 300, 300, 300  #  146.14184445158827, 64.25, 57.395512104034424
#n_time_steps, l1, l2, l3 = 1, 90, 90, 90    # 94.31879577140427, 76.1, 23.693809986114502
#n_time_steps, l1, l2, l3 = 1, 60, 60, 60    # 85.54492505725425, 76.97, 23.741946697235107
l1, l2, l3 = 88, 88, 88
for n_time_steps in range(1,13): #predict with 1 to 12 past values of medition 
  previsao_LSTM('c.lynx', dataset, n_time_steps, l1, l2, l3,num_epochs, batch_size)

def random_model():
  for n_time_steps in range(1,2): #predict with 1 to 12 past values of medition    
    for l1 in range(8,101,40): # chose layer 1 nodes - min 2 and max 4
        for l2 in range(8,101,20): # chose layer 2 nodes - min 4 and max 12
            for l3 in range(8,101,10): # chose layer 3 nodes - min 6 and max 8
                previsao_LSTM('c.lynx', dataset, n_time_steps, l1, l2, l3,num_epochs, batch_size)    
                
#random_model()

forecast for canadian lynx
LSTM (88,88,88) n_time_steps=1 epochs=100


['Dataset', 'Best Params', 'n_time_steps', 'MSE', 'RMSE', 'MAE', 'MAPE', 'sMAPE', 'Duration']
['c.lynx', 'LSTM (88,88,88) n_time_steps=1 epochs=100', 1, 7525965.919087246, 2743.349397923503, 2947.046432495117, 81.3351426945413, 76.84, 110.84691071510315]
LSTM (88,88,88) n_time_steps=2 epochs=100
['Dataset', 'Best Params', 'n_time_steps', 'MSE', 'RMSE', 'MAE', 'MAPE', 'sMAPE', 'Duration']
['c.lynx', 'LSTM (88,88,88) n_time_steps=2 epochs=100', 2, 5152045.899627118, 2269.8118643683047, 1714.4157104492188, 76.3741283575502, 64.28, 101.43463253974915]
LSTM (88,88,88) n_time_steps=3 epochs=100
['Dataset', 'Best Params', 'n_time_steps', 'MSE', 'RMSE', 'MAE', 'MAPE', 'sMAPE', 'Duration']
['c.lynx', 'LSTM (88,88,88) n_time_steps=3 epochs=100', 3, 3671025.557062572, 1915.9920555844099, 297.0, 62.565198464926155, 49.68, 112.23868155479431]
LSTM (88,88,88) n_time_steps=4 epochs=100
['Dataset', 'Best Params', 'n_time_steps', 'MSE', 'RMSE', 'MAE', 'MAPE', 'sMAPE', 'Duration']
['c.lynx', 'LSTM (88,8