In [19]:
import tensorflow as tf
from tensorflow.keras import layers

from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler

import pandas as pd
import numpy as np

In [20]:
label_encoder = LabelEncoder()
standard_scaler = StandardScaler()

In [21]:
X = pd.read_csv(r'../data/X_expr.csv').drop(['Unnamed: 0', 'seqLibID'], axis=1).values
y = pd.read_csv(r'../data/y_cog.csv').drop(['Unnamed: 0', 'seqLibID'], axis=1).values
y = label_encoder.fit_transform(y.ravel())  

In [22]:
label_encoder.classes_

array(['AD', 'MildCognitiveImpairment', 'NoCognitiveImpairment'],
      dtype=object)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

X_train = standard_scaler.fit_transform(X_train, y_train)
X_test = standard_scaler.transform(X_test)

#### Create SelectFromModel object and select features

In [24]:
sel = SelectFromModel(RandomForestClassifier(n_estimators=400))
sel.fit(X_train, y_train)
sel.get_support()

sel_feat_bool_idx = sel.get_support()

In [25]:
n_features = int(np.sum(sel_feat_bool_idx))

In [26]:
X_train_sel = X_train[:, sel_feat_bool_idx]
X_test_sel = X_test[:, sel_feat_bool_idx]

train_ds = tf.data.Dataset.from_tensor_slices(
    (X_train_sel, y_train)
).shuffle(10000).batch(100)

test_ds = tf.data.Dataset.from_tensor_slices((X_test_sel, y_test)).batch(32)

#### Classifier definition

In [27]:
def get_nn_decoder(latent_shape, layer_sizes, activation='relu', 
                   kernel_regularizer=tf.keras.regularizers.l1_l2, regularization_weight=0.058, dropout_rates=None):
    # dense, dropout, dense, dropout, dense

    input_layer = layers.Input(shape=latent_shape)

    x = layers.Dense(layer_sizes[0], activation=activation, 
                     kernel_regularizer=kernel_regularizer(regularization_weight))(input_layer)

    for i, n_nodes in enumerate(layer_sizes[1: ], 1):
        if dropout_rates is not None:
            x = layers.Dropout(dropout_rates[i - 1])(x)

        if i == len(layer_sizes) - 1:
            x = layers.Dense(n_nodes, activation=tf.keras.activations.softmax)(x)
        else:
            x = layers.Dense(n_nodes, activation=activation, kernel_regularizer=kernel_regularizer(regularization_weight))(x)  

    model = tf.keras.Model(inputs=input_layer, outputs=x)
    return model

#### Classifier initialization

In [28]:
classifier = get_nn_decoder((n_features), [500, 500, 100, 3], dropout_rates=[0.75, 0.5, 0.5, 0.1])
optimizer = tf.keras.optimizers.Adam()

#### Train classifier

In [29]:
loss_object_sparse = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy_sparse = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy_sparse = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')


@tf.function
def train_step_nn(data, labels):
    with tf.GradientTape() as tape:
        predictions = classifier(data, training=True)
        loss = loss_object_sparse(labels, predictions)
    gradients = tape.gradient(loss, classifier.trainable_variables)
    optimizer.apply_gradients(zip(gradients, classifier.trainable_variables))

    train_loss(loss)
    train_accuracy_sparse(labels, predictions)


@tf.function
def test_step_nn(data, labels):
    predictions = classifier(data, training=False)
    t_loss = loss_object_sparse(labels, predictions)

    test_loss(t_loss)
    test_accuracy_sparse(labels, predictions)

In [30]:
EPOCHS = 100

for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy_sparse.reset_states()
    test_loss.reset_states()
    test_accuracy_sparse.reset_states()

    for data, labels in train_ds:
        train_step_nn(data, labels)

    for test_data, test_labels in test_ds:
        test_step_nn(test_data, test_labels)

    print(
        f'Epoch {epoch + 1}, '
        f'Loss: {train_loss.result()}, '
        f'Accuracy: {train_accuracy_sparse.result() * 100}, '
        f'Test Loss: {test_loss.result()}, '
        f'Test Accuracy: {test_accuracy_sparse.result() * 100}'
    )

Epoch 1, Loss: 3.4213743209838867, Accuracy: 29.032257080078125, Test Loss: 1.4720094203948975, Test Accuracy: 40.90909194946289
Epoch 2, Loss: 3.2875616550445557, Accuracy: 35.48387145996094, Test Loss: 1.5032718181610107, Test Accuracy: 43.181819915771484
Epoch 3, Loss: 3.0453665256500244, Accuracy: 43.14516067504883, Test Loss: 1.21458101272583, Test Accuracy: 47.727272033691406
Epoch 4, Loss: 2.781614303588867, Accuracy: 39.91935729980469, Test Loss: 1.2010416984558105, Test Accuracy: 43.181819915771484
Epoch 5, Loss: 2.6843996047973633, Accuracy: 40.32258224487305, Test Loss: 1.1793212890625, Test Accuracy: 34.09090805053711
Epoch 6, Loss: 3.020679473876953, Accuracy: 41.935482025146484, Test Loss: 1.1369210481643677, Test Accuracy: 27.272727966308594
Epoch 7, Loss: 2.0471174716949463, Accuracy: 44.75806427001953, Test Loss: 1.121247410774231, Test Accuracy: 34.09090805053711
Epoch 8, Loss: 1.938887119293213, Accuracy: 46.774192810058594, Test Loss: 1.1036217212677002, Test Accura