In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import LabelEncoder, StandardScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
from keras import backend as K
from keras.utils.np_utils import to_categorical

import pandas as pd
import numpy as np

import time

2022-12-23 17:25:02.454216: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-23 17:25:02.454229: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  from .autonotebook import tqdm as notebook_tqdm


In [44]:
class BaseDNN(tf.keras.Model):
    def __init__(self, node_counts, dropout_ratios=None, 
                 activation='relu', optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True)):
        super(BaseDNN, self).init__()
        self.node_counts = node_counts
        self.dropout_ratios = [0 for _ in range(len(self.node_counts))] if dropout_ratios is None else dropout_ratios
        
        self.activation = activation
        self.optimizer=optimizer
        self.loss=loss

    def call(self, x):
        for n_nodes, dropout_ratio in zip(self.node_counts, self.dropout_ratios):
            x = layers.Dense(n_nodes, activation=self.activation)(x)
            x = layers.Dropout(dropout_ratio)(x)
        return x


train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name='test_accuracy')

@tf.function
def train_step(model, data, labels):
    with tf.GradientTape() as tape:
        # training=True is only needed if there are layers with different
        # behavior during training versus inference (e.g. Dropout).
        predictions = model(data, training=True)
        loss = model.loss(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)


@tf.function
def test_step(model, data, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(data, training=False)
    t_loss = model.loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [45]:
label_encoder = LabelEncoder()
standard_scaler = StandardScaler()

def fit_transform_one_hot(arr):
    output = np.zeros([arr.shape[0]])
    for i in range(len(output)):
        output[i] = np.argmax(arr[i])
    return output

label_encoder.fit_transform_one_hot = fit_transform_one_hot

In [46]:
X = pd.read_csv(r'../data/X_expr.csv').drop(['Unnamed: 0', 'seqLibID'], axis=1).values
y = pd.read_csv(r'../data/y_cog.csv').drop(['Unnamed: 0', 'seqLibID'], axis=1).values

for i, patient in enumerate(y):
    if y[i][0] == "MildCognitiveImpairment":
        y[i][0] = "AD"

"""y_class1_idx = np.where(y == 'NoCognitiveImpairment')[0]
y_class2_idx = np.where(y == 'MildCognitiveImpairment')[0]
y_class3_idx = np.where(y == 'AD')[0]

# smallest class is MildCognitiveImpairment, with 73 samples
y_class1_idx_sub = np.random.choice(y_class1_idx, 73, replace=False)
y_class2_idx_sub = np.random.choice(y_class2_idx, 73, replace=False)
y_class3_idx_sub = np.random.choice(y_class3_idx, 73, replace=False)

bal_idx = np.hstack([y_class1_idx_sub, y_class2_idx_sub, y_class3_idx_sub])

X = X[bal_idx]
y = y[bal_idx]
"""
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

In [47]:
print(np.where(y == 'NoCognitiveImpairment')[0].shape[0])
print(np.where(y == 'MildCognitiveImpairment')[0].shape[0])
print(np.where(y == 'AD')[0].shape[0])
y

115
0
177


array([['NoCognitiveImpairment'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['AD'],
       ['AD'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['NoCognitiveImpairment'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['AD'],
       ['AD'],
       ['AD'],
       ['AD'],
       ['AD'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['NoCognitiveImpairment'],
       ['NoCognitiveImpairment'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['AD'],
       ['NoCognitiveImpairment'],
       ['AD'],
       ['AD'],
       ['AD']

In [48]:
X_train_normalized = standard_scaler.fit_transform(X_train, y_train)
X_test_normalized = standard_scaler.transform(X_test)

In [49]:
n_features = X_train.shape[1]
n_classes = 2  # y_train.shape[1]

# nfe 7, 67, 72
# np 121, 79, -82

lr_l1 = LogisticRegression(penalty="l2", multi_class="ovr", solver="saga", max_iter=500)
svm = SVC()
rf = RandomForestClassifier(n_estimators=500)

dnn = keras.Sequential([
    layers.Dense(n_features),
    layers.Dense(9000),
    layers.Dense(2000),
    layers.Dense(100),
    layers.Dense(n_classes)
])
dnn.compile(optimizer='adam', 
            loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
            metrics=[keras.metrics.CategoricalAccuracy()])


classical_models = (lr_l1, svm, rf)

y_train_sparse = label_encoder.fit_transform(y_train.ravel())
y_test_sparse = label_encoder.transform(y_test.ravel())


base_dnn = BaseDNN([n_features, 9000, 2000, 100, n_classes])

In [7]:
for model in classical_models:
    model.fit(X_train_normalized, y_train_sparse)
    y_pred = model.predict(X_test_normalized).ravel()
    
    cnf = confusion_matrix(y_test_sparse, y_pred)
    acc = accuracy_score(y_test_sparse, y_pred)
    # f1 = f1_score(y_test_sparse, y_pred)

    print(f"acc: {acc}")  # , f1: {f1}")
    print(cnf)
    print("********************")



acc: 0.36666666666666664
[[4 4 3]
 [4 3 0]
 [2 6 4]]
********************
acc: 0.3
[[ 0  0 11]
 [ 0  0  7]
 [ 3  0  9]]
********************
acc: 0.4
[[4 0 7]
 [2 0 5]
 [4 0 8]]
********************


In [36]:
y_train_one_hot = to_categorical(y_train_sparse)
y_test_one_hot = to_categorical(y_test_sparse)
# print(y_train_one_hot)

dnn.fit(X_train, y_train_one_hot, epochs=5)
y_pred_train = dnn.predict(X_train)
y_pred = dnn.predict(X_test)

y_pred_sparse_train = label_encoder.fit_transform_one_hot(y_pred_train)
y_pred_sparse = label_encoder.fit_transform_one_hot(y_pred)

cnf_train = confusion_matrix(y_train_sparse, y_pred_sparse_train)
acc_train = accuracy_score(y_train_sparse, y_pred_sparse_train)

print(f"acc: {acc_train}")
print(cnf_train)
print("********************")

cnf_test = confusion_matrix(y_test_sparse, y_pred_sparse)
acc_test = accuracy_score(y_test_sparse, y_pred_sparse)

print(f"acc: {acc_test}")
print(cnf_test)
print("********************")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
acc: 0.4122137404580153
[[  5 154]
 [  0 103]]
********************
acc: 0.4
[[ 0 18]
 [ 0 12]]
********************


In [110]:
np.where(y_test_sparse == 2)[0].shape[0]

# 133, 61, -77

6

In [None]:
def encoder(input_encoder):
	inputs = keras.Input(shape=input_encoder, name='input_layer')

	# Block-1
	x = layers.Dense(100, name='conv_1')(inputs)
	x = layers.LeakyReLU(name='lrelu_1')(x)

	# Block-2
	x = layers.Dense(75, name='conv_2')(x)
	x = layers.LeakyReLU(name='lrelu_2')(x)

	# Block-3
	x = layers.Dense(75, name='conv_3')(x)
	x = layers.LeakyReLU(name='lrelu_3')(x)


	# Block-4
	x = layers.Dense(50, name='conv_4')(x)
	x = layers.LeakyReLU(name='lrelu_4')(x)

	# Final Block
	flatten = x
	mean = layers.Dense(2, name='mean')(flatten)
	log_var = layers.Dense(2, name='log_var')(flatten)
	model = tf.keras.Model(inputs, (mean, log_var), name="Encoder")

	return model


def sampling(input_1,input_2):
	mean = keras.Input(shape=input_1, name='input_layer1')
	log_var = keras.Input(shape=input_2, name='input_layer2')
	out = layers.Lambda(sampling_reparameterization_model, name='encoder_output')([mean, log_var])
	enc_2 = tf.keras.Model([mean,log_var], out,  name="Encoder_2")

	return enc_2


def sampling_reparameterization_model(distribution_params):
    mean, log_var = distribution_params
    epsilon = K.random_normal(shape=K.shape(mean), mean=0., stddev=1.)
    z = mean + K.exp(log_var / 2) * epsilon

    return z


# replace the decoder with the supervised learning method
def decoder(input_decoder):
		
	inputs = keras.Input(shape=input_decoder, name='input_layer')
	x = layers.Dense(50, name='dense_1')(inputs)
	
	# Block-1
	x = layers.Dense(75,name='conv_transpose_1')(x)
	x = layers.BatchNormalization(name='bn_1')(x)
	x = layers.LeakyReLU(name='lrelu_1')(x)
	
	# Block-2
	x = layers.Dense(75, name='conv_transpose_2')(x)
	x = layers.BatchNormalization(name='bn_2')(x)
	x = layers.LeakyReLU(name='lrelu_2')(x)		

	# INSERT NORMAL SUPERVISED LEARNING HERE?
	# Block-4
	outputs = layers.Dense(100, name='conv_transpose_4')(x)
	model = tf.keras.Model(inputs, outputs, name="Decoder")
	return model	

optimizer = tf.keras.optimizers.Adam(lr = 0.0005)

def mse_loss(y_true, y_pred):
	r_loss = K.mean(K.square(y_true - y_pred), axis = [1,2,3])
	return 1000 * r_loss
	

def kl_loss(mean, log_var):
	kl_loss =  -0.5 * K.sum(1 + log_var - K.square(mean) - K.exp(log_var), axis = 1)
	return kl_loss
	

def vae_loss(y_true, y_pred, mean, log_var):
	r_loss = mse_loss(y_true, y_pred)
	kl_loss = kl_loss(mean, log_var)
	return  r_loss + kl_loss

@tf.function
def train_step(images):
	
	with tf.GradientTape() as enc, tf.GradientTape() as dec:
		
		mean, log_var = enc(images, training=True)
		latent = sampling([mean, log_var])
		generated_images = dec(latent, training=True)
		loss = vae_loss(images, generated_images, mean, log_var)
	
			
	gradients_of_enc = encoder.gradient(loss, enc.trainable_variables)
	gradients_of_dec = decoder.gradient(loss, dec.trainable_variables)
		
		
	optimizer.apply_gradients(zip(gradients_of_enc, enc.trainable_variables))
	optimizer.apply_gradients(zip(gradients_of_dec, dec.trainable_variables))
	return loss


def train(dataset, epochs):
	for epoch in range(epochs):
		start = time.time()
		for image_batch in dataset:
			train_step(image_batch)
	
	print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))



  super(Adam, self).__init__(name, **kwargs)
