In [3]:
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [4]:
import pandas as pd 
import dataprep.dataset as dp
filename='MLParamData_1583906408.4261804_From_MLrn_2020-03-10+00_00_00_to_2020-03-11+00_00_00.h5_processed.csv.gz'
df = dp.load_reformated_cvs('../data/'+filename)
df = df.set_index(pd.to_datetime(df.time))
df = df.dropna()
print(len(df))

KeyError: (0, 200000)

In [None]:
import numpy as np 
from sklearn.preprocessing import MinMaxScaler

## 1 second (cycle - 15Hz)
look_back    = 10*15 
look_forward = 1 
    
def create_dataset(dataset, look_back=1,look_forward=1):
    X, Y = [], []
    offset = look_back+look_forward
    for i in range(len(dataset)-(offset+1)):
        xx = dataset[i:(i+look_back), 0]
        yy = dataset[(i + look_back):(i + offset), 0]
        X.append(xx)
        Y.append(yy)
    return np.array(X), np.array(Y)

def get_dataset(variable='B:VIMIN'):

    dataset = df[variable].values #numpy.ndarray
    dataset = dataset.astype('float32')
    dataset = np.reshape(dataset, (-1, 1))
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    ## TODO: Fix
    #print(len(dataset))
    train_size = int(len(dataset) * 0.70)
    #print(train_size)
    test_size = len(dataset) - train_size
    #print(test_size)

    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

    X_train, Y_train = create_dataset(train, look_back,look_forward)
    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    Y_train = np.reshape(Y_train, (Y_train.shape[0],  Y_train.shape[1]))
    #print(X_train.shape)
    #print(Y_train.shape)
    
    X_test, Y_test = create_dataset(test, look_back,look_forward)
    X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
    Y_test = np.reshape(Y_test, (Y_test.shape[0],  Y_test.shape[1]))
    #print(X_test.shape)
    #print(Y_test.shape)
    return scaler, X_train, Y_train, X_test, Y_test

variables = ['B:VIMIN','B:IMINER','B:LINFRQ','I:IB','I:MDAT40']
data_list = []
for v in range(len(variables)):
    data_list.append(get_dataset(variable=variables[v]))

## Injector model data
InjX_train = np.concatenate((data_list[3][1],data_list[4][1]),axis=1)
InjY_train = np.concatenate((data_list[3][2],data_list[4][2]),axis=1) 
InjX_test = np.concatenate((data_list[3][3],data_list[4][3]),axis=1)
InjY_test = np.concatenate((data_list[3][4],data_list[4][4]),axis=1) 
print(InjX_train.shape)
print(InjY_train.shape)

## Booster model data
BoX_train = np.concatenate((data_list[0][1],data_list[1][1],data_list[2][1],data_list[3][1],data_list[4][1]),axis=1) 
BoY_train = np.concatenate((data_list[0][2],data_list[1][2],data_list[2][2]),axis=1) 
BoX_test = np.concatenate((data_list[0][3],data_list[1][3],data_list[2][3],data_list[3][3],data_list[4][3]),axis=1) 
BoY_test = np.concatenate((data_list[0][4],data_list[1][4],data_list[2][4]),axis=1) 
print(BoX_train.shape)
print(BoY_train.shape)
from pickle import dump
# save the scaler
for v in range(len(variables)):
    dump(data_list[v][0], open('scaler_var{}.pkl'.format(v), 'wb'))

In [None]:
import src.analysis as ana
import src.models as models
e=150
bs=101
injector_history, injector_model = models.train_lstm_model(in_shape=(2,150),out_shape=2,
                                                           x=InjX_train,y=InjY_train,
                                                           epochs=e,batch_size=bs)
save_name='injector_adam256_e{}_bs{}'.format(e,bs)
ana.plot_loss(injector_history,name='loss_{}'.format(save_name))
ana.plot_test(injector_model,InjX_test,InjY_test,name='test_{}'.format(save_name))
injector_model.save('model_{}.h5'.format(save_name))

In [None]:
import time
import src.analysis as ana
import src.models as models

start_time=time.time()
booster_history, booster_model = models.train_lstm_model(in_shape=(5,150),out_shape=3,
                                                         x=BoX_train,y=BoY_train,
                                                         epochs=e,batch_size=bs)
print('Training time: {}'.format(time.time()-start_time))
save_name='booster_adam256_e{}_bs{}'.format(e,bs)
ana.plot_loss(booster_history,name='loss_{}'.format(save_name))
ana.plot_test(booster_model,BoX_test,BoY_test,nvar=3,name='test_{}'.format(save_name))
booster_model.save('model_{}.h5'.format(save_name))