In [1]:
import math
import numpy as np
from numpy import *
from os import path
import os
import sys
import matplotlib.pyplot as plt

import keras
from keras.utils import Sequence
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Dense, Embedding, LSTM
from keras.optimizers import Adam
from keras import optimizers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import losses
from keras.utils import plot_model
from tensorflow.python.client import device_lib

Using TensorFlow backend.


In [2]:
np.random.seed(0)  # Set a random seed for reproducibility

In [3]:
# <--------------------->
# Tunable

agmntCount = 1
blc_id = 0
pred_step = 1
gpu_id = str(1)

rnn_sequence_length = 300
cutFromTail = 60
cutFromHead = 144
max_pred_step = 60
# <--------------------->

In [4]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# <--------------------->
# Tunable
os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id
batch_size = 1
# <--------------------->
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 5689295328722328053
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1810169856
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7483925215774166915
physical_device_desc: "device: 0, name: GeForce GTX 770, pci bus id: 0000:02:00.0, compute capability: 3.0"
]


In [5]:
def read_dtaset_by_index(index):
    inpath = "../data/"
    currentfile = path.join(inpath, "data_T_{0}.csv".format(index))
    # Read from file
    strdatatype = np.dtype([('N', np.int_), ('Mode', np.float_, (5,)),
                            ('kalmanT', np.float_, (10,)),
                            ('kalmanT_dot', np.float_, (10,)),
                            ('rwavT', np.float_, (10,)),
                            ('ma13T', np.float_, (10,)),
                            ('ma55T', np.float_, (10,)),
                            ('ma144T', np.float_, (10,)),
                            ('S', np.float_, (10,)),
                            ('lfc', np.float_, (10,))])
    # N, Mode, kalmanT, kalmanT_dot, rwavT, ma13T, ma55T, ma144T, S, lfc
    return np.loadtxt(currentfile, unpack=True, delimiter=';', skiprows=1, dtype=strdatatype)


In [6]:
# Read unaugmented dataset
N, Mode, kalmanT, kalmanT_dot, rwavT, ma13T, ma55T, ma144T, S, lfc = read_dtaset_by_index(0)
# Alloc and read agmntCount augmented copies, collect full dataset
n_features = 13
ds = np.empty((agmntCount + 1, len(kalmanT[:, 0]), n_features))
(ds[0, :, 0], ds[0, :, 1], ds[0, :, 2], ds[0, :, 3], ds[0, :, 4],
    ds[0, :, 5], ds[0, :, 6], ds[0, :, 7], ds[0, :, 8:13]) = (kalmanT[:, blc_id], kalmanT_dot[:, blc_id],
                                                              rwavT[:, blc_id], ma13T[:, blc_id], ma55T[:, blc_id],
                                                              ma144T[:, blc_id], S[:, blc_id], lfc[:, blc_id], Mode[:, :])

In [7]:
for agmnt_index in range(1, agmntCount + 1):
    _N, _Mode, _kalmanT, _kalmanT_dot, _rwavT, _ma13T, _ma55T, _ma144T, _S, _lfc = read_dtaset_by_index(agmnt_index)
    (ds[agmnt_index, :, 0], ds[agmnt_index, :, 1], ds[agmnt_index, :, 2], ds[agmnt_index, :, 3], ds[agmnt_index, :, 4],
     ds[agmnt_index, :, 5], ds[agmnt_index, :, 6], ds[agmnt_index, :, 7], ds[agmnt_index, :, 8:13]) = (_kalmanT[:, blc_id], _kalmanT_dot[:, blc_id],
                                                                                                       _rwavT[:, blc_id], _ma13T[:, blc_id], _ma55T[:, blc_id],
                                                                                                       _ma144T[:, blc_id], _S[:, blc_id], _lfc[:, blc_id], _Mode[:, :])

In [8]:
# boundaries
l_b, r_b = cutFromHead, cutFromTail
len_data = len(ds[0, l_b:-r_b, 0])

len_test = int(rnn_sequence_length * 1.25)
len_train = len_data - len_test
print(l_b, r_b)
print("len_data = ", len_data)
print("len_test = ", len_test)
print("len_train = ", len_train)

144 60
len_data =  2197
len_test =  375
len_train =  1822


In [9]:
ds_train = ds[:, l_b:l_b + len_train, :]
ds_test = ds[:, -(r_b + len_test):-r_b, :]

print("ds: ", shape(ds))
print("ds_train: ", shape(ds_train))
print("ds_test: ", shape(ds_test))

ds:  (2, 2401, 13)
ds_train:  (2, 1822, 13)
ds_test:  (2, 375, 13)


In [10]:
def batch_generator_validation(batch_size, rnn_sequence_length):
    while True:
        X1_shape = (batch_size, rnn_sequence_length, 13)
        X1 = np.zeros(shape=X1_shape, dtype=np.float16)

        X2_shape = (batch_size, 13)
        X2 = np.zeros(shape=X2_shape, dtype=np.float16)

        Y1_shape = (batch_size, 1)
        Y1 = np.zeros(shape=Y1_shape, dtype=np.float16)

        Y2_shape = (batch_size, 1)
        Y2 = np.zeros(shape=Y2_shape, dtype=np.float16)

        for i in range(batch_size):
            # Get a random start-index.
            # This points somewhere into the training-data.
            idx = np.random.randint(len_test - (rnn_sequence_length + max_pred_step))
            # This points somewhere into the augmented series range.
            idaugmnt = np.random.randint(agmntCount)

            # Copy the sequences of data starting at this index.
            X1[i, :, :] = ds_test[idaugmnt, idx:idx + rnn_sequence_length, :]
            X2[i, :] = ds_test[idaugmnt, idx + rnn_sequence_length - 1, :]

            Y1[i, 0] = ds_test[idaugmnt, idx + rnn_sequence_length, 0]
            Y2[i, 0] = ds_test[idaugmnt, idx + pred_step + rnn_sequence_length - 1, 6]
        yield [X1, X2], [Y1, Y2]

In [11]:
#generator_traindata = batch_generator_train(batch_size=batch_size, rnn_sequence_length=rnn_sequence_length)
generator_validdata = batch_generator_validation(batch_size=batch_size, rnn_sequence_length=rnn_sequence_length)

In [12]:
[tmpX1, tmpX2], [tmpY1, tmpY2] = next(generator_validdata)
print(tmpY2[0])

[0.]


In [13]:
steps_per_epoch = int((len_train - rnn_sequence_length) * agmntCount / batch_size)
validation_steps = int((len_test - rnn_sequence_length) * agmntCount / batch_size)
print("steps_per_epoch = ", steps_per_epoch)
print("validation_steps = ", validation_steps)


steps_per_epoch =  1522
validation_steps =  75


In [14]:
np.unique(ds_train[0, :, 6], return_counts=True, return_index=True)


(array([0., 1.]), array([0, 5]), array([ 457, 1365]))

In [15]:
np.unique(ds_test[0, :, 6], return_counts=True, return_index=True)

(array([0., 1.]), array([24,  0]), array([ 88, 287]))

In [16]:
l_b, r_b = cutFromHead + rnn_sequence_length , cutFromTail + max_pred_step
#targets = S[l_b:-r_b, blc_id]
targets = ds[0, l_b:-r_b, 6]

In [17]:
np.unique(targets, return_counts=True)

(array([0., 1.]), array([ 441, 1396]))

In [18]:
def collect_indexes_by_target_value():
    vals, counts = np.unique(targets, return_counts=True)
    target_0_indexes = np.zeros(counts[0], dtype=int)
    target_1_indexes = np.zeros(counts[1], dtype=int)
    tmp_i, tmp_j = 0, 0
    for k in range(0, len(targets)):
        if (int(targets[k]) == int(vals[0])):
            target_0_indexes[tmp_i] = k
            tmp_i += 1
        elif (int(targets[k]) == int(vals[1])):
            target_1_indexes[tmp_j] = k
            tmp_j += 1
    return target_0_indexes, target_1_indexes

In [19]:
target_0_indexes, target_1_indexes = collect_indexes_by_target_value()
np.random.shuffle(target_0_indexes)
np.random.shuffle(target_1_indexes)

In [20]:
print("targets: ", shape(targets))
print("target_0_indexes: ", shape(target_0_indexes))
print("target_1_indexes: ", shape(target_1_indexes))

targets:  (1837,)
target_0_indexes:  (441,)
target_1_indexes:  (1396,)


In [21]:
inbalance_f = int(np.round((len(target_1_indexes) / len(target_0_indexes))))

In [22]:
inbalance_f

3

In [23]:
#target_0_indexes = np.tile(target_0_indexes, inbalance_f)

In [24]:
print("targets: ", shape(targets))
print("target_0_indexes: ", shape(target_0_indexes))
print("target_1_indexes: ", shape(target_1_indexes))

targets:  (1837,)
target_0_indexes:  (441,)
target_1_indexes:  (1396,)


In [25]:
train_test_split = 0.8
target_0_indexes_train_len = int(train_test_split * len(target_0_indexes))
target_0_indexes_train = target_0_indexes[0:target_0_indexes_train_len]
target_0_indexes_test = target_0_indexes[target_0_indexes_train_len:]

target_1_indexes_train_len = int(train_test_split * len(target_1_indexes))
target_1_indexes_train = target_1_indexes[0:target_1_indexes_train_len]
target_1_indexes_test = target_1_indexes[target_1_indexes_train_len:]

In [26]:
print("target_0_indexes_train: ", shape(target_0_indexes_train))
print("target_0_indexes_test: ", shape(target_0_indexes_test))

print("target_1_indexes_train: ", shape(target_1_indexes_train))
print("target_1_indexes_test: ", shape(target_1_indexes_test))

target_0_indexes_train:  (352,)
target_0_indexes_test:  (89,)
target_1_indexes_train:  (1116,)
target_1_indexes_test:  (280,)


In [27]:
indexes_train = np.concatenate((target_0_indexes_train, target_1_indexes_train), axis=0)
indexes_test = np.concatenate((target_0_indexes_test, target_1_indexes_test), axis=0)
#indexes_train = target_0_indexes_train
#indexes_test = target_1_indexes_test
np.random.shuffle(indexes_train)
np.random.shuffle(indexes_test)

In [28]:
print("indexes_train: ", shape(indexes_train))
print("indexes_test: ", shape(indexes_test))

print("indexes_train: ", indexes_train)
print("indexes_test: ", indexes_test)

indexes_train:  (1468,)
indexes_test:  (369,)
indexes_train:  [ 997 1816 1508 ... 1747  964 1300]
indexes_test:  [ 337 1385 1401 1167  728  990  300  459  144  489  830  824  181  155
 1585 1408  896  361  433 1111  537 1654  491 1516  187 1354  773 1110
  329   28  680 1246 1084  610  428 1362 1445 1199 1540  927  714 1522
 1781 1495  479  838 1551 1235  528 1656  984  236 1611 1087 1796 1464
  493  269  383  246 1527   50 1328  942  393  638  783 1648 1184   99
  839 1641  744 1315 1759  855  785  745 1651 1674  840  324  198  851
 1190  335 1567  791  891 1721   72  582  174 1091 1629  663  323  520
  206 1707 1569  709 1141  829  809 1617 1222 1836 1584 1530 1202 1738
  171  241  265 1647 1777  524  364 1592   29  900 1623 1026 1442  802
  458  127  442 1031 1625  162  579  285 1459 1535  333 1375  849 1701
  988  923  659  769  499 1204  238 1339 1216  925 1521 1798 1218  503
  394  484  735  517 1205 1264 1106  143 1369 1801 1321 1102   34 1407
  549  343  360  426  483 1367  194

In [29]:
print("indexes_train max: ", np.amax(indexes_train))
print("indexes_train min: ", np.amin(indexes_train))

print("indexes_test max: ", np.amax(indexes_test))
print("indexes_test min: ", np.amin(indexes_test))

indexes_train max:  1835
indexes_train min:  0
indexes_test max:  1836
indexes_test min:  8


In [30]:
def index_generator(indexes):
    while True:
        for j in indexes:
            yield j

In [31]:
train_index = index_generator(indexes = indexes_train)
test_index = index_generator(indexes = indexes_test)

In [32]:
def batch_generator_train(batch_size, rnn_sequence_length):
    while True:
        X1_shape = (batch_size, rnn_sequence_length, 13)
        X1 = np.zeros(shape=X1_shape, dtype=np.float16)

        X2_shape = (batch_size, 13)
        X2 = np.zeros(shape=X2_shape, dtype=np.float16)

        Y1_shape = (batch_size, 1)
        Y1 = np.zeros(shape=Y1_shape, dtype=np.float16)

        Y2_shape = (batch_size, 1)
        Y2 = np.zeros(shape=Y2_shape, dtype=np.float16)

        for i in range(batch_size):
            # Get a random start-index.
            # This points somewhere into the training-data.
            index = next(train_index)
            l_b = cutFromHead + rnn_sequence_length
            idx = index + l_b - pred_step
            # This points somewhere into the augmented series range.
            idaugmnt = np.random.randint(agmntCount)

            # Copy the sequences of data starting at this index.
            X1[i, :, :] = ds[idaugmnt, idx - rnn_sequence_length + 1 : idx + 1, :]
            X2[i, :]    = ds[idaugmnt, idx, :]

            Y1[i, 0] = ds[0, idx, 0]
            Y2[i, 0] = ds[0, idx + pred_step, 6]
        yield [X1, X2], [Y1, Y2]

In [33]:
generator_traindata = batch_generator_train(batch_size=batch_size, rnn_sequence_length=rnn_sequence_length)

In [34]:
[tmpX1, tmpX2], [tmpY1, tmpY2] = next(generator_traindata)
print(tmpX2[0, 0:6])
print(tmpY2[0])

[-0.1597  -0.07324 -0.2874  -0.4124  -0.695   -0.1755 ]
[1.]
