In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
def load_features(filename,
                  skip_header=True,
                  skip_instname=True,
                  delim=' ',
                  num_lines=0):
    if num_lines == 0:
        num_lines = get_num_lines(filename, skip_header)

    data = np.empty(
        (num_lines, 25), float)

    with open(filename, 'r') as csv_file:
        if skip_header:
            next(csv_file)
        c = 0
        for line in tqdm(csv_file):
            offset = 0
            if skip_instname:
                offset = line.find(delim) + 1
            data[c, :] = np.fromstring(line[offset:], dtype=float, sep=delim)
            c += 1

    return data

def get_num_lines(filename, skip_header):
    with open(filename, 'r') as csv_file:
        if skip_header:
            next(csv_file)
        c = 0
        for line in csv_file:
            c += 1
    return c

def load_labels(filename, col_labels=1, gen_headers=True, delim=','):
    headers = []
    labels = []
    
    with open(filename, 'r') as csv_file:
        for i, line in tqdm(enumerate(csv_file)):            
            cols = np.fromstring(line, dtype=float, sep=delim)
            if i == 0:
                headers = line.rstrip().split(delim)[col_labels:]                
                continue
            
            labels.append(cols[col_labels:])
    if gen_headers:
        return np.array(labels), headers
    else:
        return np.array(labels)

In [3]:
data_path = './Functional_features/'

print("Loading training samples...")
x_train = load_features(data_path+'train.txt', skip_header=False, skip_instname=False)
x_train = x_train.reshape((25253, 399, 25))
y_train, headers = load_labels(data_path+'train_labels.txt', gen_headers=True)

print("Loading validation samples...")
x_validation = load_features(data_path+'validation.txt', skip_header = False, skip_instname=False)
x_validation = x_validation.reshape((9471, 399, 25))
y_validation = load_labels(data_path+'validation_labels.txt', gen_headers=False)

# print("Loading testing samples...")
# x_test = load_features(data_path+'test.txt', skip_header = False, skip_instname=False)
# x_test = x_test.reshape((13794, 399, 25))
# y_test = load_labels(data_path+'test_labels.txt', gen_headers=False)

print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_validation shape:', x_validation.shape)
print('y_validation shape:', y_validation.shape)
# print('x_test shape:', x_test.shape)
# print('y_test shape:', y_test.shape)

Loading training samples...


10075947it [01:57, 85756.07it/s]
  cols = np.fromstring(line, dtype=float, sep=delim)
25254it [00:00, 280346.28it/s]


Loading validation samples...


3778929it [00:45, 82249.01it/s]
9472it [00:00, 312792.87it/s]

x_train shape: (25253, 399, 25)
y_train shape: (25253, 3)
x_validation shape: (9471, 399, 25)
y_validation shape: (9471, 3)





### Building RNN-LSTM model


In [4]:
import keras.backend as K
from keras.models import Model, save_model, load_model, Sequential
from keras.layers import Input, Dense, Masking, LSTM, Dropout, TimeDistributed, Bidirectional
from tensorflow.keras.optimizers import RMSprop, Adam

from numpy.random import seed
from tensorflow.keras.utils import set_random_seed

from sklearn.preprocessing import StandardScaler

2022-05-05 22:41:15.796158: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/changhyun/catkin_ws/devel/lib:/opt/ros/noetic/lib
2022-05-05 22:41:15.796197: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [5]:
def create_model(num_units_1=64, num_units_2=32):
    model = Sequential()
    model.add(LSTM(units=num_units_1, input_dim=25, return_sequences=False))
    model.add(Dropout(0.1))
    # model.add(LSTM(units=num_units_2, return_sequences=False))
    # model.add(Dropout(0.1))
    model.add(Dense(3))
    rms = RMSprop(learning_rate=0.001)
    model.compile(loss='mse', optimizer=rms)
    return model

In [6]:
def scale_data(x, y):
  scaler = StandardScaler()
  x_flatten = x.reshape(-1, x.shape[2])
  y_flatten = np.empty((x_flatten.shape[0], 3))
  index = 0
  for i in range(0, len(x)):
    while index < 399 * (i + 1):
      y_flatten[index, 0] = y[i, 0]
      y_flatten[index, 1] = y[i, 1]
      y_flatten[index, 2] = y[i, 2]
      index += 1

  concatenated = np.concatenate((x_flatten, y_flatten), axis=1)
  scaled = scaler.fit_transform(concatenated)

  x_flatten = scaled[:, :25]
  y_flatten = scaled[:, :-3]
  x_rev = scaled[:, :25].reshape(x.shape)
  index = 0  
  while index < len(y_flatten) / 399:
    y[index, 0] = scaled[index * 399, 25]
    y[index, 1] = scaled[index * 399, 26]
    y[index, 2] = scaled[index * 399, 27]
    index += 1
  
  
  return scaler, x_rev, y

In [7]:
model = create_model()

2022-05-05 22:41:19.852964: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/changhyun/catkin_ws/devel/lib:/opt/ros/noetic/lib
2022-05-05 22:41:19.852988: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-05-05 22:41:19.853007: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (changhyun-X510UAR): /proc/driver/nvidia/version does not exist
2022-05-05 22:41:19.853929: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
scaler, x_train, y_train = scale_data(x_train, y_train)

In [12]:
num_epochs = 5
epoch = 1
batch_size = 50
while epoch <= num_epochs:
    model.fit(x_train, y_train, batch_size=batch_size, initial_epoch=epoch-1, epochs=epoch)  # Evaluate after each epoch        
    epoch += 1

2022-05-05 22:42:41.107024: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1007594700 exceeds 10% of free system memory.


Epoch 2/2


2022-05-05 22:44:04.984744: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1007594700 exceeds 10% of free system memory.


Epoch 3/3


2022-05-05 22:45:25.974806: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1007594700 exceeds 10% of free system memory.


Epoch 4/4


2022-05-05 22:46:48.921603: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1007594700 exceeds 10% of free system memory.




KeyboardInterrupt: 