In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from pandas import DataFrame
import numpy as np

from math import sqrt
from numpy import concatenate
from pandas import concat
from numpy import ndarray
import csv

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import Adam


In [2]:
# fungsi untuk merubah data menjadi supervised learning problem
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [3]:
def ubah_data(the_data, lag):
    reframed = series_to_supervised(the_data, lag, 1)
    return reframed

In [4]:
def ubah_LSTM(data_framed, size):
    # split data menjadi data train dan test
    values= data_framed.values
    
    #train = values[:n_total, :]
    #test = values[n_total:, :]
    
    test_size = size
    test_size = len(values) - test_size
    train = values[0:test_size]
    test = values[test_size:len(values)]

    # split menjadi input dan output
    train_X, train_y = train[:, :-1], train[:, -1]
    test_X, test_y = test[:, :-1], test[:, -1]

    # reshape input menjadi [samples, timesteps, features]
    train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
    test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
    
    return train_X, train_y, test_X, test_y

In [5]:
def definisi_LSTM(hidden_layer, max_epoch, batch_size, train_X, train_y, test_X, test_y):
    
    if batch_size == "N":
        batch_size = len(train_X)
    else:
        batch_size = batch_size
    
    neu = int(2/3*(len(train_X)+1))
    model = Sequential()
    
    if hidden_layer == 1:
        model.add(LSTM(neu, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
    elif hidden_layer == 2:
        model.add(LSTM(neu, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
        model.add(LSTM(neu))
    else:
        model.add(LSTM(neu, return_sequences=True, input_shape=(train_X.shape[1], train_X.shape[2])))
        model.add(Dense(neu, activation='relu'))
    
    model.add(Dense(1))
    
    model.compile(loss='mae', optimizer='adam')

    # fit network
    history = model.fit(train_X, train_y, epochs=max_epoch, batch_size=batch_size, validation_data=(test_X, test_y), verbose=0, shuffle=False)
    # plot history
    print("============== Model LSTM ================")
    print("Jumlah Neuron : ", neu)
    print("Max epoch : ", max_epoch)
    print("Skenario Hidden Layer : ", hidden_layer)
    print("Batch size : ", batch_size)
    #plt.plot(history.history['loss'], label='train')
    #plt.plot(history.history['val_loss'], label='test')
    #plt.legend()
    #plt.show()
    
    return model

In [6]:
def prediksi_evaluasi(history, test_X, data_evaluasi):
    yhat = history.predict(test_X)
    
    # jadikan list semua hasil prediksi
    hasil_lstm = ndarray.tolist(yhat)
    list_evaluasi = data_evaluasi['case'].tolist()
    length = len(list_evaluasi)
    hasil_final = list()

    for i in range(length):
        result_single = hasil_lstm[i]
        result_single = result_single[0]
        tambah = result_single
        hasil_final.append(tambah)
        #print('hasil tambah',tambah)
    
    print("============= Hasil Evaluasi LSTM ==============")
    mse = mean_squared_error(list_evaluasi, hasil_final)
    print("Hasil MSE :", mse)
    
    rmse = mean_squared_error(list_evaluasi, hasil_final, squared=False)
    print("Hasil RMSE :", rmse)
    
    mae = mean_absolute_error(list_evaluasi, hasil_final)
    print("Hasil MAE :", mae)
    
    return mse, rmse, mae

In [7]:
case = pd.read_csv('data/new/case_maret_july.csv')
case.set_index('date', inplace=True)

# Setting parameter
size_all = 10
validation_size = len(case) - size_all
train, validation = case[0:validation_size], case[validation_size:len(case)]

lag_sliding_window = [1, 2, 3]
hidden_layer = [1, 2, 3]
max_epoch = [100, 200, 300]
batch_size = [1, 2, "N"]

hasil_grid = []
count = 1
for i in lag_sliding_window:
    for j in hidden_layer:
        for k in max_epoch:
            for l in batch_size:
                print("===== Grid Search Iterasi ke : ", count, " =====")
                print("Lag sliding window : ", i)
                ubah = ubah_data(train, i)
                train_X, train_y, test_X, test_y = ubah_LSTM(ubah, size_all)
                training_LSTM = definisi_LSTM(j, k, l, train_X, train_y, test_X, test_y)
                mse, rmse, mae = prediksi_evaluasi(training_LSTM, test_X, validation)
                hasil_grid.append((i, j, k, l, mse, rmse, mae))
                count=count+1
                
data_grid = np.vstack(hasil_grid)

===== Grid Search Iterasi ke :  1  =====
Lag sliding window :  1
Jumlah Neuron :  268
Max epoch :  100
Skenario Hidden Layer :  1
Batch size :  1
Hasil MSE : 245508.8416416849
Hasil RMSE : 495.4884879002588
Hasil MAE : 477.29312591552736
===== Grid Search Iterasi ke :  2  =====
Lag sliding window :  1
Jumlah Neuron :  268
Max epoch :  100
Skenario Hidden Layer :  1
Batch size :  2
Hasil MSE : 314450.7286769524
Hasil RMSE : 560.7590647300785
Hasil MAE : 544.7483032226562
===== Grid Search Iterasi ke :  3  =====
Lag sliding window :  1
Jumlah Neuron :  268
Max epoch :  100
Skenario Hidden Layer :  1
Batch size :  402
Hasil MSE : 504594.9011183016
Hasil RMSE : 710.3484364157506
Hasil MAE : 697.7785625457764
===== Grid Search Iterasi ke :  4  =====
Lag sliding window :  1
Jumlah Neuron :  268
Max epoch :  200
Skenario Hidden Layer :  1
Batch size :  1
Hasil MSE : 343233.5463121945
Hasil RMSE : 585.8613712408376
Hasil MAE : 570.5554626464843
===== Grid Search Iterasi ke :  5  =====
Lag slid

KeyboardInterrupt: 

In [None]:
header = ['lag sliding window', 'hidden layer', 'max epoch', ' batch size', 'mse', 'rmse', 'mae']
with open('hasil_grid.csv', 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    writer.writerows(data_grid)

print("Selesai bosqueeeee")