In [1]:
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
import pandas as pd 
import dataprep.dataset as dp
filename='MLParamData_1583906408.4261804_From_MLrn_2020-03-10+00_00_00_to_2020-03-11+00_00_00.h5_processed.csv.gz'
nsteps=250000
df = dp.load_reformated_cvs('../data/'+filename,nrows=nsteps)
df = df.set_index(pd.to_datetime(df.time))
df = df.dropna()
print(len(df))

234885


In [3]:
import numpy as np 
from sklearn.preprocessing import MinMaxScaler

## 1 second (cycle - 15Hz)
look_back    = 10*15 
look_forward = 1 
    
def create_dataset(dataset, look_back=1,look_forward=1):
    X, Y = [], []
    offset = look_back+look_forward
    for i in range(len(dataset)-(offset+1)):
        xx = dataset[i:(i+look_back), 0]
        yy = dataset[(i + look_back):(i + offset), 0]
        X.append(xx)
        Y.append(yy)
    return np.array(X), np.array(Y)

def get_dataset(variable='B:VIMIN'):

    dataset = df[variable].values #numpy.ndarray
    dataset = dataset.astype('float32')
    dataset = np.reshape(dataset, (-1, 1))
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)

    ## TODO: Fix
    #print(len(dataset))
    train_size = int(len(dataset) * 0.70)
    #print(train_size)
    test_size = len(dataset) - train_size
    #print(test_size)

    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

    X_train, Y_train = create_dataset(train, look_back,look_forward)
    X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    Y_train = np.reshape(Y_train, (Y_train.shape[0],  Y_train.shape[1]))
    #print(X_train.shape)
    #print(Y_train.shape)
    
    X_test, Y_test = create_dataset(test, look_back,look_forward)
    X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
    Y_test = np.reshape(Y_test, (Y_test.shape[0],  Y_test.shape[1]))
    #print(X_test.shape)
    #print(Y_test.shape)
    return scaler, X_train, Y_train, X_test, Y_test

variables = ['B:VIMIN','B:IMINER','B:LINFRQ','I:IB','I:MDAT40']
data_list = []
for v in range(len(variables)):
    data_list.append(get_dataset(variable=variables[v]))

# Axis
concate_axis=2
## Injector model data
InjX_train = np.concatenate((data_list[3][1],data_list[4][1]),axis=concate_axis)
InjY_train = np.concatenate((data_list[3][2],data_list[4][2]),axis=1) 
InjX_test = np.concatenate((data_list[3][3],data_list[4][3]),axis=concate_axis)
InjY_test = np.concatenate((data_list[3][4],data_list[4][4]),axis=1) 
print(InjX_train.shape)
print(InjY_train.shape)

## Booster model data
BoX_train = np.concatenate((data_list[0][1],data_list[1][1],data_list[2][1],data_list[3][1],data_list[4][1]),axis=concate_axis) 
BoY_train = np.concatenate((data_list[0][2],data_list[1][2],data_list[2][2],data_list[3][2],data_list[4][2]),axis=1) 
BoX_test = np.concatenate((data_list[0][3],data_list[1][3],data_list[2][3],data_list[3][3],data_list[4][3]),axis=concate_axis) 
BoY_test = np.concatenate((data_list[0][4],data_list[1][4],data_list[2][4],data_list[3][4],data_list[4][4]),axis=1) 
print(BoX_train.shape)
print(BoY_train.shape)
from pickle import dump
# save the scaler
for v in range(len(variables)):
    dump(data_list[v][0], open('scaler_var{}_nsteps{}k.pkl'.format(v,int(nsteps/1000)), 'wb'))

(164267, 1, 300)
(164267, 2)
(164267, 1, 750)
(164267, 5)


In [None]:
import time
import src.analysis as ana
import src.models as models
e=350
bs=99
in_shape=(5,150)
if concate_axis==2:
    in_shape=(1,5*150)
    
start_time=time.time()
booster_history, booster_model = models.train_lstm_model(in_shape=in_shape,out_shape=5,
                                                         x=BoX_train,y=BoY_train,
                                                         epochs=e,batch_size=bs)
print('Training time: {}'.format(time.time()-start_time))
save_name='booster_adam256_e{}_bs{}_nsteps{}k_axis{}'.format(e,bs,int(nsteps/1000),concate_axis)
ana.plot_loss(booster_history,name='loss_{}'.format(save_name))
ana.plot_test(booster_model,BoX_test,BoY_test,nvar=5,name='test_{}'.format(save_name))
booster_model.save('model_{}.h5'.format(save_name))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 1, 256)            1031168   
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 256)            525312    
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dense (Dense)                (None, 5)                 1285      
Total params: 2,083,077
Trainable params: 2,083,077
Non-trainable params: 0
_________________________________________________________________
Epoch 1/350
1328/1328 - 17s - loss: 0.3831 - val_loss: 0.3426 - lr: 0.0100
Epoch 2/350
1328/1328 - 16s - loss: 0.3402 - val_loss: 0.3354 - lr: 0.0100
Epoch 3/350
1328/1328 - 16s - loss: 0.3401 - val_loss: 0.3457 - lr: 0.0100
Epoch 4/350
1328/13

Epoch 87/350
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0716 - lr: 0.0020
Epoch 88/350
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0733 - lr: 0.0020
Epoch 89/350
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0705 - lr: 0.0020
Epoch 90/350
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0715 - lr: 0.0020
Epoch 91/350
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0731 - lr: 0.0020
Epoch 92/350
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0711 - lr: 0.0020
Epoch 93/350
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0724 - lr: 0.0020
Epoch 94/350

Epoch 00094: ReduceLROnPlateau reducing learning rate to 0.0016734321834519506.
1328/1328 - 16s - loss: 0.0728 - val_loss: 0.0720 - lr: 0.0020
Epoch 95/350
1328/1328 - 16s - loss: 0.0631 - val_loss: 0.0615 - lr: 0.0017
Epoch 96/350
1328/1328 - 16s - loss: 0.0631 - val_loss: 0.0624 - lr: 0.0017
Epoch 97/350
1328/1328 - 16s - loss: 0.0631 - val_loss: 0.0627 - lr: 0.0017
Epoch 98/350
1328/1328 - 16s - loss: 0.0631 - val_loss: 0.0622 - lr: 0.0017
Epoch 9

Epoch 183/350
1328/1328 - 16s - loss: 0.0255 - val_loss: 0.0252 - lr: 5.3646e-04
Epoch 184/350

Epoch 00184: ReduceLROnPlateau reducing learning rate to 0.0004559943830827251.
1328/1328 - 16s - loss: 0.0255 - val_loss: 0.0251 - lr: 5.3646e-04
Epoch 185/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0226 - lr: 4.5599e-04
Epoch 186/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0226 - lr: 4.5599e-04
Epoch 187/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0227 - lr: 4.5599e-04
Epoch 188/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0225 - lr: 4.5599e-04
Epoch 189/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0223 - lr: 4.5599e-04
Epoch 190/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0226 - lr: 4.5599e-04
Epoch 191/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0224 - lr: 4.5599e-04
Epoch 192/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0224 - lr: 4.5599e-04
Epoch 193/350
1328/1328 - 16s - loss: 0.0229 - val_loss: 0.0228 - lr: 4.5599e-04
Epoch 194/350
1328/1328 - 16

1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0118 - lr: 1.2425e-04
Epoch 276/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0118 - lr: 1.2425e-04
Epoch 277/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0119 - lr: 1.2425e-04
Epoch 278/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0117 - lr: 1.2425e-04
Epoch 279/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0117 - lr: 1.2425e-04
Epoch 280/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0118 - lr: 1.2425e-04
Epoch 281/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0116 - lr: 1.2425e-04
Epoch 282/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0116 - lr: 1.2425e-04
Epoch 283/350
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0118 - lr: 1.2425e-04
Epoch 284/350

Epoch 00284: ReduceLROnPlateau reducing learning rate to 0.00010561603776295669.
1328/1328 - 16s - loss: 0.0119 - val_loss: 0.0116 - lr: 1.2425e-04
Epoch 285/350
1328/1328 - 16s - loss: 0.0113 - val_loss: 0.0113 - lr: 1.0562e-04
Epoch 286/350
1328/1328 - 16s - loss: 0.0

In [None]:
import src.analysis as ana
import src.models as models
#e=150
#bs=101
in_shape=(2,150)
if concate_axis==2:
    in_shape=(1,2*150)
    
injector_history, injector_model = models.train_lstm_model(in_shape=in_shape,out_shape=2,
                                                           x=InjX_train,y=InjY_train,
                                                           epochs=e,batch_size=bs)
save_name='injector_adam256_e{}_bs{}_nsteps{}k_axis{}'.format(e,bs,int(nsteps/1000),concate_axis)
ana.plot_loss(injector_history,name='loss_{}'.format(save_name))
ana.plot_test(injector_model,InjX_test,InjY_test,name='test_{}'.format(save_name))
injector_model.save('model_{}.h5'.format(save_name))

In [None]:
## 250 gives good results ##