In [1]:
# grid search lstm for airline passengers
from math import sqrt
from numpy import array
from numpy import mean
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

In [2]:
# create a list of configs to try
def model_configs():
    # define scope of configs
    
    n_input = [1, 3, 7, 14] #--> Lag sliding windos
    n_nodes = [103, 68, 51, 41, 34, 29, 25, 22, 20, 17, 12, 11, 10, 8, 7, 6, 5, 13, 9, 4, 3, 2] #--> jumlah node per layer
    n_epochs = [100] #--> jumlah epoch
    n_batch = [1,4,8,16] #--> jumlah batch
    n_diff = [0] #--> difference dikasih 0 saja
    
    # create configs
    configs = list()
    for i in n_input:
        for j in n_nodes:
            for k in n_epochs:
                for l in n_batch:
                    for m in n_diff:
                        cfg = [i, j, k, l, m]
                        configs.append(cfg)
    print('Total configs: %d' % len(configs))
    return configs

In [3]:
# grid search configs
def grid_search(data, cfg_list, n_test):
    # evaluate configs
    scores = [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores

In [4]:
# score a model, return None on failure
def repeat_evaluate(data, config, n_test, n_repeats=10):
    # convert config to a key
    key = str(config)
    # fit and evaluate the model n times
    scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
    # summarize score
    result = mean(scores)
    print('> Model[%s] %.3f' % (key, result))
    return (key, result)

In [5]:
# fit a model
def model_fit(train, config):
    # unpack config
    n_input, n_nodes, n_epochs, n_batch, n_diff = config
    # prepare data
    if n_diff > 0:
        train = difference(train, n_diff)
    # transform series into supervised format
    data = series_to_supervised(train, n_in=n_input)
    # separate inputs and outputs
    train_x, train_y = data[:, :-1], data[:, -1]
    # reshape input data into [samples, timesteps, features]
    n_features = 1
    train_x = train_x.reshape((train_x.shape[0], train_x.shape[1], n_features))
    # define model
    model = Sequential()
    model.add(LSTM(n_nodes, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(n_nodes, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    # fit model
    model.fit(train_x, train_y, epochs=n_epochs, batch_size=n_batch, verbose=0)
    return model

In [6]:
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # fit model
    model = model_fit(train, cfg)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = model_predict(model, history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    print(' > %.3f' % error)
    return error

In [7]:
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [8]:
# difference dataset
def difference(data, order):
    return [data[i] - data[i - order] for i in range(order, len(data))]

In [9]:
# transform list into supervised learning format
def series_to_supervised(data, n_in=1, n_out=1):
    df = DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = concat(cols, axis=1)
    # drop rows with NaN values
    agg.dropna(inplace=True)
    return agg.values

In [10]:
# forecast with the fit model
def model_predict(model, history, config):
    # unpack config
    n_input, _, _, _, n_diff = config
    # prepare data
    correction = 0.0
    if n_diff > 0:
        correction = history[-n_diff]
        history = difference(history, n_diff)
    # reshape sample into [samples, timesteps, features]
    x_input = array(history[-n_input:]).reshape((1, n_input, 1))
    # forecast
    yhat = model.predict(x_input, verbose=0)
    return correction + yhat[0]

In [11]:
# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [12]:
# define dataset
series = read_csv('data/new/case_maret_july.csv', header=0, index_col=0)
data = series.values

# data split
n_test = 10

# model configs
cfg_list = model_configs()

# grid search
scores = grid_search(data, cfg_list, n_test)
print('done')
# list top 10 configs
for cfg, error in scores[:3]:
    print(cfg, error)

Total configs: 352
 > 211.104
 > 196.958
 > 196.446
 > 200.363
 > 199.626
 > 198.583
 > 237.746
 > 198.065
 > 198.361
 > 199.194
> Model[[1, 103, 100, 1, 0]] 203.645
 > 199.119
 > 213.470
 > 205.630
 > 205.081
 > 200.749
 > 200.784
 > 206.305
 > 198.716
 > 199.168
 > 208.345
> Model[[1, 103, 100, 4, 0]] 203.737
 > 200.719
 > 203.026
 > 202.211
 > 202.079
 > 199.563
 > 199.888
 > 199.882
 > 200.018
 > 201.560
 > 199.665
> Model[[1, 103, 100, 8, 0]] 200.861
 > 200.542
 > 202.085
 > 201.451
 > 204.088
 > 201.242
 > 201.933
 > 200.332
 > 202.416
 > 200.538
 > 202.567
> Model[[1, 103, 100, 16, 0]] 201.719
 > 197.981
 > 198.230
 > 196.651
 > 198.949
 > 197.111
 > 199.618
 > 207.210
 > 198.611
 > 198.737
 > 198.996
> Model[[1, 68, 100, 1, 0]] 199.209
 > 200.458
 > 198.832
 > 199.730
 > 203.582
 > 199.077
 > 199.165
 > 200.211
 > 199.368
 > 199.227
 > 200.536
> Model[[1, 68, 100, 4, 0]] 200.019
 > 201.670
 > 201.697
 > 199.999
 > 203.534
 > 200.015
 > 204.335
 > 200.825
 > 201.623
 > 204.186
 

KeyboardInterrupt: 