<a href="https://colab.research.google.com/github/cuckoong/Analyzing_Neural_Time_Series/blob/master/CRAM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## setup environment

In [None]:
## using tensorflow 1.14
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
# from cnn_class import cnn
import time
import scipy.io as sio
from sklearn.metrics import classification_report, roc_auc_score, auc, roc_curve, f1_score
# from RnnAttention.attention import attention
from scipy import interp
# import tensorflow.compat.v1 as tf
# tf.disable_v2_behavior()

## Attention

In [None]:
import tensorflow as tf


def attention(inputs, attention_size, time_major=False, return_alphas=False, train_phase=True):
    """
    Attention mechanism layer which reduces RNN/Bi-RNN outputs with Attention vector.
    
    Args:
        inputs: The Attention inputs.
            Matches outputs of RNN/Bi-RNN layer (not final state):
                In case of RNN, this must be RNN outputs `Tensor`:
                    If time_major == False (default), this must be a tensor of shape:
                        `[batch_size, max_time, cell.output_size]`.
                    If time_major == True, this must be a tensor of shape:
                        `[max_time, batch_size, cell.output_size]`.
                In case of Bidirectional RNN, this must be a tuple (outputs_fw, outputs_bw) containing the forward and
                the backward RNN outputs `Tensor`.
                    If time_major == False (default),
                        outputs_fw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_bw.output_size]`.
                    If time_major == True,
                        outputs_fw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_bw.output_size]`.
        attention_size: Linear size of the Attention weights.
        time_major: The shape format of the `inputs` Tensors.
            If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
            If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
            Using `time_major = True` is a bit more efficient because it avoids
            transposes at the beginning and end of the RNN calculation.  However,
            most TensorFlow data is batch-major, so by default this function
            accepts input and emits output in batch-major form.
        return_alphas: Whether to return attention coefficients variable along with layer's output.
            Used for visualization purpose.
    Returns:
        The Attention output `Tensor`.
        In case of RNN, this will be a `Tensor` shaped:
            `[batch_size, cell.output_size]`.
        In case of Bidirectional RNN, this will be a `Tensor` shaped:
            `[batch_size, cell_fw.output_size + cell_bw.output_size]`.
    """

    if isinstance(inputs, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        inputs = tf.concat(inputs, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        inputs = tf.array_ops.transpose(inputs, [1, 0, 2])

    hidden_size = inputs.shape[2].value  # D value - hidden size of the RNN layer

    # Trainable parameters
    w_omega = tf.Variable(tf.random_normal([hidden_size, attention_size], stddev=0.1))
    b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
    u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))

    with tf.name_scope('v'):
        # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
        #  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
        v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)
    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    vu = tf.tensordot(v, u_omega, axes=1, name='vu')  # (B,T) shape
    alphas = tf.nn.softmax(vu, name='alphas')         # (B,T) shape

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)

    if not return_alphas:
        return output
    else:
        return output, alphas

## cnn class

In [None]:
import tensorflow as tf

class cnn:
	def __init__(
			self,
			weight_stddev	= 0.1,
			bias_constant	= 0.1,
			padding			= "SAME",
			):
			self.weight_stddev	= weight_stddev
			self.bias_constant	= bias_constant
			self.padding		= padding

	def weight_variable(self, shape):
		initial = tf.truncated_normal(shape, stddev = self.weight_stddev)
		return tf.Variable(initial)


	def bias_variable(self, shape):
		initial = tf.constant(self.bias_constant, shape = shape)
		return tf.Variable(initial)


	def conv1d(self, x, W, kernel_stride):
	# API: must strides[0]=strides[4]=1
		return tf.nn.conv1d(x, W, stride=kernel_stride, padding=self.padding)


	def conv2d(self, x, W, kernel_stride):
	# API: must strides[0]=strides[4]=1
		return tf.nn.conv2d(x, W, strides=[1, kernel_stride, kernel_stride, 1], padding=self.padding)


	def conv3d(self, x, W, kernel_stride):
	# API: must strides[0]=strides[4]=1
		return tf.nn.conv3d(x, W, strides=[1, kernel_stride, kernel_stride, kernel_stride, 1], padding=self.padding)


	def apply_conv1d(self, x, filter_width, in_channels, out_channels, kernel_stride, train_phase):
		weight = self.weight_variable([filter_width, in_channels, out_channels])
		bias = self.bias_variable([out_channels]) # each feature map shares the same weight and bias
		conv_1d = tf.add(self.conv1d(x, weight, kernel_stride), bias)
		conv_1d_bn = self.batch_norm_cnv_1d(conv_1d, train_phase)
		return tf.nn.relu(conv_1d_bn)


	def apply_conv2d(self, x, filter_height, filter_width, in_channels, out_channels, kernel_stride, train_phase):
		weight = self.weight_variable([filter_height, filter_width, in_channels, out_channels])
		bias = self.bias_variable([out_channels]) # each feature map shares the same weight and bias
		conv_2d = tf.add(self.conv2d(x, weight, kernel_stride), bias)
		conv_2d_bn = self.batch_norm_cnv_2d(conv_2d, train_phase)
		return tf.nn.relu(conv_2d_bn)


	
	def apply_conv3d(self, x, filter_depth, filter_height, filter_width, in_channels, out_channels, kernel_stride, train_phase):
		weight = self.weight_variable([filter_depth, filter_height, filter_width, in_channels, out_channels])
		bias = self.bias_variable([out_channels]) # each feature map shares the same weight and bias
		conv_3d = tf.add(self.conv3d(x, weight, kernel_stride), bias)
		conv_3d_bn = self.batch_norm_cnv_3d(conv_3d, train_phase)
		return tf.nn.relu(conv_3d_bn)


	def batch_norm_cnv_3d(self, inputs, train_phase):
		return tf.layers.batch_normalization(inputs, axis=4, momentum=0.993, epsilon=1e-5, scale=False, training=train_phase)


	def batch_norm_cnv_2d(self, inputs, train_phase):
		return tf.layers.batch_normalization(inputs, axis=3, momentum=0.993, epsilon=1e-5, scale=False, training=train_phase)


	def batch_norm_cnv_1d(self, inputs, train_phase):
		return tf.layers.batch_normalization(inputs, axis=2, momentum=0.993, epsilon=1e-5, scale=False, training=train_phase)


	def batch_norm(self, inputs, train_phase):
		return tf.layers.batch_normalization(inputs, axis=1, momentum=0.993, epsilon=1e-5, scale=False, training=train_phase)


	def apply_max_pooling(self, x, pooling_height, pooling_width, pooling_stride):
	# API: must ksize[0]=ksize[4]=1, strides[0]=strides[4]=1
		return tf.nn.max_pool(x, ksize=[1, pooling_height, pooling_width, 1], strides=[1, pooling_stride, pooling_stride, 1], padding=self.padding)


	def apply_max_pooling3d(self, x, pooling_depth, pooling_height, pooling_width, pooling_stride):
	# API: must ksize[0]=ksize[4]=1, strides[0]=strides[4]=1
		return tf.nn.max_pool3d(x, ksize=[1, pooling_depth, pooling_height, pooling_width, 1], strides=[1, pooling_stride, pooling_stride, pooling_stride, 1], padding=self.padding)

	
	def apply_fully_connect(self, x, x_size, fc_size, train_phase):
		fc_weight = self.weight_variable([x_size, fc_size])
		fc_bias = self.bias_variable([fc_size])
		fc = tf.add(tf.matmul(x, fc_weight), fc_bias)
		fc_bn = self.batch_norm(fc, train_phase)
		return tf.nn.relu(fc_bn)

	
	def apply_readout(self, x, x_size, readout_size):
		readout_weight = self.weight_variable([x_size, readout_size])
		readout_bias = self.bias_variable([readout_size])
		return tf.add(tf.matmul(x, readout_weight), readout_bias)


## function

In [None]:

def multiclass_roc_auc_score(y_true, y_score):
    assert y_true.shape == y_score.shape
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    n_classes = y_true.shape[1]
    # compute ROC curve and ROC area for each class
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    # compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # compute macro-average ROC curve and ROC area
    # First aggregate all false probtive rates
    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))
    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])
    # Finally average it and compute AUC
    mean_tpr /= n_classes
    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    return roc_auc

## load data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
import os
os.chdir('/content/drive/My Drive/braindecode_data')
!ls

subject_1_X_test.npy   subject_4_X_test.npy   subject_7_X_test.npy
subject_1_X_train.npy  subject_4_X_train.npy  subject_7_X_train.npy
subject_1_y_test.npy   subject_4_y_test.npy   subject_7_y_test.npy
subject_1_y_train.npy  subject_4_y_train.npy  subject_7_y_train.npy
subject_2_X_test.npy   subject_5_X_test.npy   subject_8_X_test.npy
subject_2_X_train.npy  subject_5_X_train.npy  subject_8_X_train.npy
subject_2_y_test.npy   subject_5_y_test.npy   subject_8_y_test.npy
subject_2_y_train.npy  subject_5_y_train.npy  subject_8_y_train.npy
subject_3_X_test.npy   subject_6_X_test.npy   subject_9_X_test.npy
subject_3_X_train.npy  subject_6_X_train.npy  subject_9_X_train.npy
subject_3_y_test.npy   subject_6_y_test.npy   subject_9_y_test.npy
subject_3_y_train.npy  subject_6_y_train.npy  subject_9_y_train.npy


In [None]:
# #cross_subject
# folder = 'BCI_IV'
# X_tmp = np.load(os.path.join(folder,'X.npy'))
# y_tmp = np.load(os.path.join(folder,'y.npy'))
# y_tmp = np.asarray(pd.get_dummies(y_tmp.ravel()),dtype=np.int16)
# sub = np.load(os.path.join(folder,'sub.npy'))
# sub = sub.reshape(-1,)

# sub_len = len(np.unique(sub))

# #data augmenation

# # print('length of the subject:')
# # print(sub_len)
# # X = np.expand_dims(X,axis=3)
# # print('shape:')
# print(X_tmp.shape, y_tmp.shape, sub.shape)

# print('sub:'+str(np.unique(sub)))
# print('y:'+str(np.unique(y_tmp)))

# # cross subject
# sub_select = 2
# train_X = X_tmp[sub!=sub_select,:,:]
# train_y = y_tmp[sub!=sub_select,:]
# sub_train = sub[sub!= sub_select,]

# train_X  = train_X.astype(np.float32) 


# #cross subject
# test_X = X_tmp[sub==sub_select,:,:]
# test_y = y_tmp[sub==sub_select,:]

# test_X = test_X.astype(np.float32)

## choosing subject

In [None]:
test_idx = 1
X_train = np.load('subject_'+str(test_idx)+'_X_train.npy')
X_test = np.load('subject_'+str(test_idx)+'_X_test.npy')
y_train = np.load('subject_'+str(test_idx)+'_y_train.npy')
y_test = np.load('subject_'+str(test_idx)+'_y_test.npy')

In [None]:
train_y = np.asarray(pd.get_dummies(y_train.ravel()),dtype=np.int16)
test_y = np.asarray(pd.get_dummies(y_test.ravel()),dtype=np.int16)

In [None]:
window_size = 400
step = 50
n_channel = 22

def windows(data, size, step):
	start = 0
	while ((start+size) < data.shape[0]):
		yield int(start), int(start + size)
		start += step


def segment_signal_without_transition(data, window_size, step):
	segments = []
	for (start, end) in windows(data, window_size, step):
		if(len(data[start:end]) == window_size):
			segments = segments + [data[start:end]]
	return np.array(segments)


def segment_dataset(X, window_size, step):
	win_x = []
	for i in range(X.shape[0]):
		win_x = win_x + [segment_signal_without_transition(X[i], window_size, step)]
	win_x = np.array(win_x)
	return win_x


train_raw_x = np.transpose(X_train[:,:,:,0], [0, 2, 1])
test_raw_x = np.transpose(X_test[:,:,:,0], [0, 2, 1])


train_win_x = segment_dataset(train_raw_x, window_size, step)
print("train_win_x shape: ", train_win_x.shape)
test_win_x = segment_dataset(test_raw_x, window_size, step)
print("test_win_x shape: ", test_win_x.shape)

# [trial, window, channel, time_length]
train_win_x = np.transpose(train_win_x, [0, 1, 3, 2])
print("train_win_x shape: ", train_win_x.shape)
test_win_x = np.transpose(test_win_x, [0, 1, 3, 2])
print("test_win_x shape: ", test_win_x.shape)


# [trial, window, channel, time_length, 1]
train_x = np.expand_dims(train_win_x, axis = 4)
test_x = np.expand_dims(test_win_x, axis = 4)

num_timestep = train_x.shape[1]

train_win_x shape:  (4608, 15, 400, 22)
test_win_x shape:  (576, 15, 400, 22)
train_win_x shape:  (4608, 15, 22, 400)
test_win_x shape:  (576, 15, 22, 400)


## model

In [None]:
###########################################################################
# set model parameters
###########################################################################
# kernel parameter
kernel_height_1st	= 22
kernel_width_1st 	= 45

kernel_stride		= 1

conv_channel_num	= 40

# pooling parameter
pooling_height_1st 	= 1
pooling_width_1st 	= 75

pooling_stride_1st = 10

# full connected parameter
attention_size = 512
n_hidden_state = 64

###########################################################################
# set dataset parameters
###########################################################################
# input channel
input_channel_num = 1

# input height 
input_height = train_x.shape[2]

# input width
input_width = train_x.shape[3]

# prediction class
num_labels = 4
###########################################################################
# set training parameters
###########################################################################
# set learning rate
learning_rate = 1e-4

# set maximum traing epochs
training_epochs = 200

# set batch size
batch_size = 10

# set dropout probability
dropout_prob = 0.5

# set train batch number per epoch
batch_num_per_epoch = train_x.shape[0]//batch_size

# instance cnn class
padding = 'VALID'

cnn_2d = cnn(padding=padding)

# input placeholder
X = tf.placeholder(tf.float32, shape=[None, input_height, input_width, input_channel_num], name = 'X')
Y = tf.placeholder(tf.float32, shape=[None, num_labels], name = 'Y')
train_phase = tf.placeholder(tf.bool, name = 'train_phase')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')

# first CNN layer
conv_1 = cnn_2d.apply_conv2d(X, kernel_height_1st, kernel_width_1st, input_channel_num, conv_channel_num, kernel_stride, train_phase)
print("conv 1 shape: ", conv_1.get_shape().as_list())
pool_1 = cnn_2d.apply_max_pooling(conv_1, pooling_height_1st, pooling_width_1st, pooling_stride_1st)
print("pool 1 shape: ", pool_1.get_shape().as_list())

pool1_shape = pool_1.get_shape().as_list()
pool1_flat = tf.reshape(pool_1, [-1, pool1_shape[1]*pool1_shape[2]*pool1_shape[3]])

fc_drop = tf.nn.dropout(pool1_flat, keep_prob)	

lstm_in = tf.reshape(fc_drop, [-1, num_timestep, pool1_shape[1]*pool1_shape[2]*pool1_shape[3]])


Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.batch_normalization` documentation).
Instructions for updating:
Please use `layer.__call__` method instead.
conv 1 shape:  [None, 1, 356, 40]
pool 1 shape:  [None, 1, 29, 40]
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [None]:

########################## RNN ########################
cells = []
for _ in range(2):
	cell = tf.contrib.rnn.BasicLSTMCell(n_hidden_state, forget_bias=1.0, state_is_tuple=True)
	cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
	cells.append(cell)
lstm_cell = tf.contrib.rnn.MultiRNNCell(cells)

init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)

# output ==> [batch, step, n_hidden_state]
rnn_op, states = tf.nn.dynamic_rnn(lstm_cell, lstm_in, initial_state=init_state, time_major=False)

########################## attention ########################
with tf.name_scope('Attention_layer'):
    attention_op, alphas = attention(rnn_op, attention_size, time_major = False, return_alphas=True)

attention_drop = tf.nn.dropout(attention_op, keep_prob)	

########################## readout ########################
y_ = cnn_2d.apply_readout(attention_drop, rnn_op.shape[2].value, num_labels)

# probability prediction 
y_prob = tf.nn.softmax(y_, name = "y_prob")

# class prediction 
y_pred = tf.argmax(y_prob, 1, name = "y_pred")

########################## loss and optimizer ########################
# cross entropy cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y_, labels=Y), name = 'loss')


update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
	# set training SGD optimizer
	optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

# get correctly predicted object
correct_prediction = tf.equal(tf.argmax(tf.nn.softmax(y_), 1), tf.argmax(Y, 1))

########################## define accuracy ########################
# calculate prediction accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name = 'accuracy')

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:

Future major versions of TensorFlow will allow gradients

## training

In [None]:
###########################################################################
# train test and save result
###########################################################################

# run with gpu memory growth
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

train_acc = []
test_acc = []
best_test_acc = []
train_loss = []
with tf.Session(config=config) as session:
	session.run(tf.global_variables_initializer())
	best_acc = 0
	for epoch in range(training_epochs):
		pred_test = np.array([])
		true_test = []
		prob_test = []
		########################## training process ########################
		for b in range(batch_num_per_epoch):
			offset = (b * batch_size) % (train_y.shape[0] - batch_size) 
			batch_x = train_x[offset:(offset + batch_size), :, :, :, :]
			batch_x = batch_x.reshape([len(batch_x)*num_timestep, n_channel, window_size, 1])
			batch_y = train_y[offset:(offset + batch_size), :]
			_, c = session.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y, keep_prob: 1-dropout_prob, train_phase: True})
		# calculate train and test accuracy after each training epoch
		if(epoch%1 == 0):
			train_accuracy 	= np.zeros(shape=[0], dtype=float)
			test_accuracy	= np.zeros(shape=[0], dtype=float)
			train_l 		= np.zeros(shape=[0], dtype=float)
			test_l			= np.zeros(shape=[0], dtype=float)
			# calculate train accuracy after each training epoch
			for i in range(batch_num_per_epoch):
				########################## prepare training data ########################
				offset = (i * batch_size) % (train_y.shape[0] - batch_size) 
				train_batch_x = train_x[offset:(offset + batch_size), :, :, :]
				train_batch_x = train_batch_x.reshape([len(train_batch_x)*num_timestep, n_channel, window_size, 1])
				train_batch_y = train_y[offset:(offset + batch_size), :]

				########################## calculate training results ########################
				train_a, train_c = session.run([accuracy, cost], feed_dict={X: train_batch_x, Y: train_batch_y, keep_prob: 1.0, train_phase: False})
				
				train_l = np.append(train_l, train_c)
				train_accuracy = np.append(train_accuracy, train_a)
			print("("+time.asctime(time.localtime(time.time()))+") Epoch: ", epoch+1, " Training Cost: ", np.mean(train_l), "Training Accuracy: ", np.mean(train_accuracy))
			train_acc = train_acc + [np.mean(train_accuracy)]
			train_loss = train_loss + [np.mean(train_l)]
			# calculate test accuracy after each training epoch
			for j in range(batch_num_per_epoch):
				########################## prepare test data ########################
				offset = (j * batch_size) % (test_y.shape[0] - batch_size) 
				test_batch_x = test_x[offset:(offset + batch_size), :, :, :]
				test_batch_x = test_batch_x.reshape([len(test_batch_x)*num_timestep, n_channel, window_size, 1])
				test_batch_y = test_y[offset:(offset + batch_size), :]
				
				########################## calculate test results ########################
				test_a, test_c, prob_v, pred_v = session.run([accuracy, cost, y_prob, y_pred], feed_dict={X: test_batch_x, Y: test_batch_y, keep_prob: 1.0, train_phase: False})
				
				test_accuracy = np.append(test_accuracy, test_a)
				test_l = np.append(test_l, test_c)
				pred_test = np.append(pred_test, pred_v)
				true_test.append(test_batch_y)
				prob_test.append(prob_v)
			if np.mean(test_accuracy) > best_acc :
				best_acc = np.mean(test_accuracy)
			true_test = np.array(true_test).reshape([-1, num_labels])
			prob_test = np.array(prob_test).reshape([-1, num_labels])
			auc_roc_test = multiclass_roc_auc_score(y_true=true_test, y_score=prob_test)
			f1 = f1_score(y_true=np.argmax(true_test, axis = 1), y_pred=pred_test, average = 'macro')
			print("("+time.asctime(time.localtime(time.time()))+") Epoch: ", epoch+1, "Test Cost: ", np.mean(test_l), 
																					  "Test Accuracy: ", np.mean(test_accuracy), 
																					  "Test f1: ", f1, 
																					  "Test AUC: ", auc_roc_test['macro'], "\n")
   

(Fri Aug 28 07:27:57 2020) Epoch:  1  Training Cost:  1.3879954861558002 Training Accuracy:  0.2639130494678798




(Fri Aug 28 07:28:06 2020) Epoch:  1 Test Cost:  1.3883017371530118 Test Accuracy:  0.2741304411350385 Test f1:  0.1622781464421421 Test AUC:  0.5114551508070527 

(Fri Aug 28 07:28:27 2020) Epoch:  2  Training Cost:  1.3854779992414556 Training Accuracy:  0.2691304407365944
(Fri Aug 28 07:28:35 2020) Epoch:  2 Test Cost:  1.387074663328088 Test Accuracy:  0.2704347886145115 Test f1:  0.16561169844519075 Test AUC:  0.517691551468064 

(Fri Aug 28 07:28:57 2020) Epoch:  3  Training Cost:  1.3839342459388402 Training Accuracy:  0.28978261471442557
(Fri Aug 28 07:29:05 2020) Epoch:  3 Test Cost:  1.3848437177098316 Test Accuracy:  0.26760870159968086 Test f1:  0.13163609252219494 Test AUC:  0.5584538967672077 

(Fri Aug 28 07:29:27 2020) Epoch:  4  Training Cost:  1.3832486354786417 Training Accuracy:  0.27934783198263335
(Fri Aug 28 07:29:35 2020) Epoch:  4 Test Cost:  1.3846649978471839 Test Accuracy:  0.24869565793677517 Test f1:  0.16795231416549788 Test AUC:  0.5543413524087308 

(Fr