In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

In [1]:
import math
import numpy as np
import pandas as pd

class DataLoader():
    '''Class for loading and transforming data for LSTM model'''
    def __init__(self, filename_x, filename_y):
        dataframe_x = pd.read_csv(filename_x, sep='\s+',header=None)
        dataframe_y = pd.read_csv(filename_y, sep='\s+',header=None)
        self.x = dataframe_x.values[:]
        self.y = dataframe_y.values[:]
        self.len = len(self.x)

    def get_data(self):
#         x = []
#         y = []
#         train_dim = math.floor(self.len/self.seq_len)
#         for i in range(train_dim):
#             start = i*self.seq_len
#             end = (i+1)*self.seq_len
#             x.append(self.x[start:end])
#             y.append(self.y[start:end])
            # print(i*self.seq_len, (i+1)*self.seq_len)
        return np.array(self.x), np.array(self.y)

    def _next_window(self, i, seq_len, normalise):
        '''Generates the next data window from the given index location i'''
        window = self.x[i:i+seq_len]
        window = self.normalise_windows(window, single_window=True)[0] if normalise else window
        x = window[:-1]
        y = window[-1, [0]]
        return x, y

    def normalise_windows(self, window_data, single_window=False):
        '''Normalise window with a base value of zero'''
        normalised_data = []
        window_data = [window_data] if single_window else window_data
        for window in window_data:
            normalised_window = []
            for col_i in range(window.shape[1]):
                normalised_col = [((float(p) / float(window[0, col_i])) - 1) for p in window[:, col_i]]
                normalised_window.append(normalised_col)
            normalised_window = np.array(normalised_window).T # reshape and transpose array back into original multidimensional format
            normalised_data.append(normalised_window)
        return np.array(normalised_data)
    
    def generate_train_batch(self, seq_len, batch_size, normalise):
        '''Yield a generator of training data from filename on given list of cols split for train/test'''
        i = 0
        while i < (self.len - seq_len):
            x_batch = []
            y_batch = []
            for b in range(batch_size):
                if i >= (self.len - seq_len):
                    # stop-condition for a smaller final batch if data doesn't divide evenly
                    yield np.array(x_batch), np.array(y_batch)
                    i = 0
                x, y = self._next_window(i, seq_len, normalise)
                x_batch.append(x)
                y_batch.append(y)
                i += 1
            yield np.array(x_batch), np.array(y_batch)
        

In [3]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
import matplotlib.pyplot as plt
import numpy as np
import json
%matplotlib inline  

 
# date-time parsing function for loading the dataset
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')
 
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
	df = DataFrame(data)
	columns = [df.shift(i) for i in range(1, lag+1)]
	columns.append(df)
	df = concat(columns, axis=1)
	df.fillna(0, inplace=True)
	return df
 
# create a differenced series
def difference(dataset, interval=1):
	diff = list()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
	return Series(diff)
 
# invert differenced value
def inverse_difference(history, yhat, interval=1):
	return yhat + history[-interval]
 
# scale train and test data to [-1, 1]
def scale(train, test):
	# fit scaler
	scaler = MinMaxScaler(feature_range=(0, 1))
	scaler = scaler.fit(train)
	# transform train
	train = train.reshape(train.shape[0], train.shape[1])
	train_scaled = scaler.transform(train)
	# transform test
	test = test.reshape(test.shape[0], test.shape[1])
	test_scaled = scaler.transform(test)
	return scaler, train_scaled, test_scaled
 
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
	new_row = [x for x in X] + [value]
	array = np.array(new_row)
	array = array.reshape(1, len(array))
	inverted = scaler.inverse_transform(array)
	return inverted[0, -1]
 
# fit an LSTM network to training data
def fit_lstm(train, batch_size, nb_epoch, neurons, optimizer):
    X, y = train[:, 0:-1], train[:, -1] 
    X = X.reshape(X.shape[0], 1, X.shape[1])
    model = Sequential()
    model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    history = []
    for i in range(nb_epoch):
        hist = model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
        print("Epoch: {}, ".format(i+1),"loss: {}".format(hist.history['loss'][0]))
        history.append(hist.history['loss'][0])
        model.reset_states()
    return model, history
 
# make a one-step forecast
def forecast_lstm(model, batch_size, X):
	X = X.reshape(1, 1, len(X))
	yhat = model.predict(X, batch_size=batch_size)
	return yhat[0,0]

project_path="./"
save_dir = project_path + "saved_models/"
data_path = project_path + "C1-6/C1-6_CanTho/"
filenames = ["C1"]
# filenames = ["C1", "C2"]
# optimizers = ['rmsprop', 'adagrad', 'adadelta', 'adam']
optimizers = ['rmsprop']

i = 0
for filename in filenames:
    training_data = DataLoader(data_path + filename + "/Training_Input.txt", data_path + filename + "/Training_Target.txt")
    test_data = DataLoader(data_path + filename + "/Testing_Input.txt", data_path + filename + "/Testing_Target.txt")

    x_train, y_train = training_data.get_data()
    x_test, y_test = test_data.get_data()

    train = np.concatenate((x_train, y_train), axis=1)
    test = np.concatenate((x_test, y_test), axis=1)
    for optimizer in optimizers:
        print("[Model] data: {}, optimizer: {}".format(filename, optimizer))
        scaler, train_scaled, test_scaled = scale(train, test)
        lstm_model, history = fit_lstm(train_scaled, 1, 3, 4, optimizer)
        
        lstm_model.save(save_dir +  '%s-%s.h5' % (filename, optimizer))
        with open(save_dir + '%s-%s-history.json' % (filename, optimizer), 'w') as f:
            json.dump(history, f)
#         train_reshaped = train_scaled[:, 0:2].reshape(len(train_scaled), 1, 2)
#         predict = lstm_model.predict(train_reshaped, batch_size=1)
#         pyplot.plot(predict)

[Model] data: C1, optimizer: rmsprop
Epoch: 1,  loss: 0.0009792015840539086
Epoch: 2,  loss: 0.0008708160870880387
Epoch: 3,  loss: 0.0007971533259256608
