In [1]:
##############################################
DATA_MODE  = 'BC_STAGE3' #{BS_STAGE3, SYNTHETIC}
##############################################

import tensorflow as tf
import numpy as np

import os, sys
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tqdm import tqdm

import import_data as impt

import class_clustering as clustering
from class_DeepTPC import DeepTPC
import train
import evaluate

  from ._conv import register_converters as _register_converters


In [2]:
if DATA_MODE == 'BC_STAGE3':
    data_M, data_D, data_T, data_X, data_Y_180, event_list, feat_list, label_180_list = impt.import_PHE_Stage3_version4(max_length=None)

    last_seq_idx = np.argmax(data_T, axis=1).reshape([-1])
    data_Mask = np.zeros([np.shape(data_M)[0], np.shape(data_M)[1]])

    for i in range(np.shape(data_Mask)[0]):
        data_Mask[i, (last_seq_idx[i]-1)] = 1

elif DATA_MODE == 'SYNTHETIC': 
    data_M, data_D, data_T = impt.import_synthetic_HawkesProcess()
    data_X = np.zeros([np.shape(data_M)[0], 1]) ##if not static feature is used
    
    last_seq_idx = np.argmax(data_T, axis=1).reshape([-1])
    data_Mask = np.zeros([np.shape(data_M)[0], np.shape(data_M)[1]])

    for i in range(np.shape(data_Mask)[0]):
        data_Mask[i, (last_seq_idx[i])] = 1
    #     data_Mask[i, (last_seq_idx[i]-1)] = 1  #if EOS available

num_Event       = np.shape(data_M)[2]
max_length      = np.shape(data_M)[1]
num_Feature     = np.shape(data_X)[1]

In [3]:
out_itr = 0 
seed = 1234

(tr_M,te_M, tr_D,te_D, tr_T,te_T, tr_X,te_X, tr_Mask,te_Mask) = train_test_split(
    data_M, data_D, data_T, data_X, data_Mask, test_size=0.2, random_state=seed+out_itr
)

init_path = './sample/itr_' + str(out_itr) + '/'

if not os.path.exists(init_path):
    os.makedirs(init_path)

In [4]:
### NETWORK PARAMETERS
# RNN
h_dim1          = 50 #RNN hidden nodes per layer
num_layers_RNN  = 2
RNN_type        = 'GRU'
RNN_active_fn   = tf.nn.tanh

# FCNet
h_dim2          = 50 #FC hidden nodes


# Clustering
L               = 10 # points for trapazoid approx. on the distance in the output space
delta_range     = np.linspace(0, np.percentile(tr_D[tr_D != 0], 95), L) 
# delta_range     = np.linspace(0, np.max(data_D), L)

# Others
initial_W       = tf.contrib.layers.xavier_initializer()

In [5]:
input_dims = {
    'num_Event':num_Event,
    'max_length':max_length,
    'num_Feature':num_Feature
}

network_settings = {
    'h_dim1':h_dim1,
    'num_layers_RNN':num_layers_RNN,
    'RNN_type':RNN_type,
    'RNN_active_fn':RNN_active_fn,
    
    'h_dim2':h_dim2,
    
    'L':L,
    'delta_range':delta_range,
    'initial_W':initial_W
}

In [6]:
DATA = {
    'M':tr_M,
    'D':tr_D,
    'T':tr_T,
    'X':tr_X,
    'Mask':tr_Mask,
}

train_parameters_init = {
    'mb_size': 64,
    'lr_train': 1e-3,
    'k_prob': 0.7,
    'seed':1234+out_itr,
    'ITERATION':40000
}

num_Cluster = 4

train_parameters_cluster = {
    'num_Cluster': num_Cluster,
    'alpha': 0.1,
    'beta': 0.1,
    'beta_cluster': 0.1,
    'beta_ms': (num_Event - 1) * [1.0],
    'gamma': 0.1,
    'ITERATION': 10000
}

In [None]:
sess, model_init = train.train_init(DATA, input_dims, network_settings, train_parameters_init, init_path)

saved...
ITR 0100: || MLE_loss=3965.5044 | va_MLE_loss=81.4666
ITR 0200: || MLE_loss=177.8243 | va_MLE_loss=84.7059
ITR 0300: || MLE_loss=134.1672 | va_MLE_loss=85.1686
ITR 0400: || MLE_loss=101.3748 | va_MLE_loss=84.3129
ITR 0500: || MLE_loss=94.2499 | va_MLE_loss=83.7492
ITR 0600: || MLE_loss=96.2044 | va_MLE_loss=83.6655
ITR 0700: || MLE_loss=92.6347 | va_MLE_loss=83.2330
ITR 0800: || MLE_loss=92.6354 | va_MLE_loss=82.8953
ITR 0900: || MLE_loss=85.4080 | va_MLE_loss=82.2874
ITR 1000: || MLE_loss=86.6301 | va_MLE_loss=81.9364
saved...
ITR 1100: || MLE_loss=84.5394 | va_MLE_loss=80.9814
saved...
ITR 1200: || MLE_loss=81.2076 | va_MLE_loss=80.1474
saved...
ITR 1300: || MLE_loss=88.4391 | va_MLE_loss=79.5803
saved...
ITR 1400: || MLE_loss=82.1548 | va_MLE_loss=78.8716
saved...
ITR 1500: || MLE_loss=81.9064 | va_MLE_loss=77.6815
ITR 1600: || MLE_loss=82.3556 | va_MLE_loss=79.2277
saved...
ITR 1700: || MLE_loss=81.0850 | va_MLE_loss=76.8819
saved...
ITR 1800: || MLE_loss=77.1248 | va_MLE_

In [None]:
sess, model_clustered, results = train.train_DeepTPC(DATA, input_dims, network_settings, train_parameters_init, train_parameters_cluster, init_path)

In [None]:
print('OUT_ITR:{} || K:{}'.format(out_itr, 4))
open_path = init_path + 'K{}/'.format(out_itr, 4)

tr_S = results['tr_S']
mu_z = results['mu_z']
mu_y = results['mu_y']

In [None]:
_, _, te_S = clustering.kmeans_MTPP_TEST(
    model_clustered, 
    te_M, te_D, te_X, te_Mask, mu_z, mu_y, delta_range, 
    num_Cluster, 1.0, train_parameters_cluster['beta_cluster'], train_parameters_cluster['beta_ms'])

In [None]:
num_sample = 100
te_predictions, te_measure = evaluate.evaluate_DeepTPC_cohesion_n_separation(model_clustered, 
                                                                    te_M, 
                                                                    te_D, 
                                                                    te_X, 
                                                                    te_S, 
                                                                    te_Mask, 
                                                                    mu_z, 
                                                                    mu_y, 
                                                                    delta_range,
                                                                    num_sample)


print('|| cohesion score = {:.4f} | silhouette index = {:.4f} ||'.format(
    np.mean(te_measure['similarity_sample_vs_sample']),
    np.mean(te_measure['s_silhouette_score']))
)