In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import sklearn
import os
import pandas as pd
import pickle
import tensorflow as tf
import numpy as np
import time
random_state = 33
np.random.seed(random_state)

In [2]:
from datetime import datetime
now = datetime.now()
now = now.strftime("%Y_%m_%d_%H_%M")

results_path = "./results/keras_" + now
print("Saving results in:", results_path)

print()
import os
try:
    os.mkdir(results_path)
except OSError:
    print("Directory %s already exists. Creating new directory under %s(2)" % (results_path, results_path))
    os.mkdir(results_path+ "(2)")

Saving results in: ./results/keras_2019_12_12_20_07

Directory ./results/keras_2019_12_12_20_07 already exists. Creating new directory under ./results/keras_2019_12_12_20_07(2)


In [3]:
with open(results_path + "/readme.txt", "w") as file:
    file.write("Training data: 1-81 64 channels, Validation data: 82-108 4 channels")

# Training data 64 channel 1-81

In [4]:
dataset_dir = "./dataset/preprocessed_dataset/"

with open(dataset_dir+"1_81_shuffle_dataset_3D_win_10.pkl", "rb") as fp:
    X_train = pickle.load(fp)
with open(dataset_dir+"1_81_shuffle_labels_3D_win_10.pkl", "rb") as fp:
    y_train = pickle.load(fp)
X_train = X_train.reshape(-1, 10, 10, 11, 1)
print("Dataset shape:", X_train.shape)
print("Labels shape:", y_train.shape)

Dataset shape: (474018, 10, 10, 11, 1)
Labels shape: (474018,)


In [5]:
print(X_train[0, 2].reshape(10, 11))

[[ 0.          0.          0.          0.         -1.76756835 -0.38667332
  -0.53203069  0.          0.          0.          0.        ]
 [ 0.          0.          0.         -0.9681028  -0.56837003  0.84886434
   0.55814959 -0.9681028   0.          0.          0.        ]
 [ 0.          0.04939879  1.39395448 -0.78640609  1.93904462  1.61199053
   1.17591842  1.28493645 -1.00444215 -0.27765529  0.        ]
 [ 0.         -0.05961923  0.95788236  1.39395448  1.64832988  1.90270528
   1.32127579  0.70350696 -0.09595858 -1.51319295  0.        ]
 [-0.82274543 -0.89542412  0.44913156  0.81252499  0.66716762  0.77618565
   0.15841682  0.19475617 -0.16863726 -1.2224782  -0.89542412]
 [ 0.         -1.25881755 -0.13229792  0.23109551 -0.02327989  0.30377419
   0.23109551  0.23109551  0.37645288 -1.11346017  0.        ]
 [ 0.         -0.93176346 -0.02327989  0.12207748  0.08573814  0.37645288
   0.59448894  0.44913156  0.08573814 -0.24131595  0.        ]
 [ 0.          0.          0.         -0.

In [6]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(sparse=False)

y_train = y_train.reshape(-1, 1)
y_train = ohe.fit_transform(y_train)

# Validation data 4-channel 82-108 

In [7]:
dataset_dir = "./dataset/preprocessed_dataset/"
result_dir = "./results/"

with open(dataset_dir+"82_108_shuffle_dataset_3D_win_10.pkl", "rb") as fp:
    X_valid = pickle.load(fp)
with open(dataset_dir+"82_108_shuffle_labels_3D_win_10.pkl", "rb") as fp:
    y_valid = pickle.load(fp)
X_valid = X_valid.reshape(-1, 10, 10, 11, 1)
print("Dataset shape:", X_valid.shape)
print("Labels shape:", y_valid.shape)

Dataset shape: (150241, 10, 10, 11, 1)
Labels shape: (150241,)


In [8]:
print(X_valid[0, 2].reshape(10, 11))

[[ 0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.         -1.89962866  0.          0.
   0.          0.26479672  0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.        ]
 [ 0.          0.31084833  0.          0.          0.          0.
   0.          0.          0.          0.21874512  0.        ]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.  

In [9]:
y_valid = y_valid.reshape(-1, 1)
y_valid = ohe.transform(y_valid)

In [10]:
with open(results_path + "/ohe", "wb") as file:
    pickle.dump(ohe, file)

# Split data

In [11]:
# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=0.25, random_state=random_state)
# print("Train dataset shape:", X_train.shape)
# print("Train label shape:", y_train.shape)
# print("Test dataset shape:", X_test.shape)
# print("Test label shape:", y_test.shape)

# Model

In [12]:
dropout_prob = 0.5
n_labels = y_train.shape[1]
training_epochs = 10
batch_size = 300
learning_rate = 1e-4

In [13]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, LSTM, Conv2D, Conv3D
from tensorflow.keras.layers import Reshape, Flatten, Softmax
from tensorflow.keras.optimizers import Adam

In [14]:
def model(input_shape):
    """
    Function creating the model's graph in Keras.
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    X_input = Input(shape = input_shape)
    
    conv_1 = Conv3D(filters=32, kernel_size=(1, 3, 3), padding="same", strides=(1, 1, 1), activation="elu")(X_input)
    conv_2 = Conv3D(filters=64, kernel_size=(1, 3, 3), padding="same", strides=(1, 1, 1), activation="elu")(conv_1)
    conv_3 = Conv3D(filters=128, kernel_size=(1, 3, 3), padding="same", strides=(1, 1, 1), activation="elu")(conv_2)
    shape = conv_3.get_shape().as_list()
    
    pool_2_flat = Reshape([shape[1], shape[2]*shape[3]*shape[4]])(conv_3)
    fc = Dense(1024, activation="elu")(pool_2_flat)
    fc_drop = Dropout(dropout_prob)(fc)
    
    lstm_in = Reshape([10, 1024])(fc_drop)
    lstm_1 = LSTM(units=1024, return_sequences=True, unit_forget_bias=True, dropout=dropout_prob)(lstm_in)
    rnn_output = LSTM(units=1024, return_sequences=False, unit_forget_bias=True)(lstm_1)
    
    shape_rnn_out = rnn_output.get_shape().as_list()
    fc_out = Dense(shape_rnn_out[1], activation="elu")(rnn_output)
    fc_drop = Dropout(dropout_prob)(fc_out)
    y_ = Dense(n_labels)(fc_drop)
    y_posi = Softmax()(y_)
    
    model = Model(inputs = X_input, outputs = y_posi)
    return model

In [15]:
model = model(input_shape = (X_train.shape[1], X_train.shape[2], X_train.shape[3], X_train.shape[4]))
opt = Adam(lr=learning_rate)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [16]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 10, 10, 11, 1)]   0         
_________________________________________________________________
conv3d (Conv3D)              (None, 10, 10, 11, 32)    320       
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 10, 10, 11, 64)    18496     
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 10, 10, 11, 128)   73856     
_________________________________________________________________
reshape (Reshape)            (None, 10, 14080)         0         
_________________________________________________________________
dense (Dense)                (None, 10, 1024)          14418944  
_________________________________________________________________
dropout (Dropout)            (None, 10, 1024)          0     

In [17]:
# from tensorflow.keras import backend as K

# def recall_m(y_true, y_pred):
#         true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
#         possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
#         recall = true_positives / (possible_positives + K.epsilon())
#         return recall

# def precision_m(y_true, y_pred):
#         true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
#         predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
#         precision = true_positives / (predicted_positives + K.epsilon())
#         return precision

# def f1_m(y_true, y_pred):
#     precision = precision_m(y_true, y_pred)
#     recall = recall_m(y_true, y_pred)
#     return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [18]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint_path = results_path + "/model/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)

In [19]:
print("Training start date and time:", datetime.now())

history = model.fit(X_train, y_train, batch_size=batch_size, epochs=2, shuffle=True, validation_data=(X_valid, y_valid), callbacks=[cp_callback])
print("Training end date and time:", datetime.now())
model.save(results_path + "/model/model.h5")

Training start date and time: 2019-12-12 20:08:02.102727
Train on 474018 samples, validate on 150241 samples
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 1/2
Epoch 00001: saving model to ./results/keras_2019_12_12_20_07/model/cp.ckpt
Epoch 2/2
Epoch 00002: saving model to ./results/keras_2019_12_12_20_07/model/cp.ckpt
Training end date and time: 2019-12-12 20:17:42.437378


In [20]:
with open(results_path + "/train_hist", "wb") as file:
    pickle.dump(history.history, file)

In [21]:
print("Training start date and time:", datetime.now())

Training start date and time: 2019-12-12 20:17:44.314626
