In [1]:
import sys 
import os
import pandas as pd  
from pathlib import Path  
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import shapiro
from pathlib import Path
import glob
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split

In [2]:
rootfolder = os.path.abspath(os.path.join(Path.cwd(), "..")) # rootpath --> top of git repo
# Change the above if you are not in $root/notebook/
sys.path.append(os.path.join(rootfolder))

from src.data.data_loader import load_data

datafolder = os.path.join(rootfolder, "data")

# Load data
train_data_dic = load_data(datafolder, filetype = "train") 
train_quant = train_data_dic["train_quant"]
train_outcome = train_data_dic["train_outcome"]
train_cate = train_data_dic["train_cate"]
train_fmri = train_data_dic["train_fmri"]

test_data_dic = load_data(datafolder, filetype = "test")
test_quant = test_data_dic["test_quant"]
test_cate = test_data_dic["test_cate"]
test_fmri = test_data_dic["test_fmri"]

In [3]:
# Deal with fMRI matrix
# fmri: the fMRI matrix which will be processed
# @returns participant_id: the sequence of participant id in fMRI matrix
# @returns fmri_matrices: the after pre-process fMRI matrix with 1213 * 200 * 200
def convert_fMRI(fmri):
    num_patients = 1213
    num_regions = 200
    fmri_matrices = np.zeros((num_patients, num_regions, num_regions))
    participant_id = fmri.iloc[:, 0]
    fmri_values = fmri.iloc[:, 1:].astype(float)

    col_index = 0
    for i in range(num_regions):
        for j in range(i + 1, num_regions):
            fmri_matrices[:, i, j] = fmri_values.iloc[:, col_index]
            fmri_matrices[:, j, i] = fmri_values.iloc[:, col_index]
            col_index += 1

    for i in range(num_regions):
        fmri_matrices[:, i, i] = 1.0
    
    return participant_id, fmri_matrices


In [4]:
train_participant_id, train_fmri_matrices = convert_fMRI(train_fmri)

In [5]:
# Preprocess the train outcome
# data: data to be preprocessed
# id: participant_id
# @returns outcome (the dataset in the order of the participant id in fMRI)
def preprocess(data, id):
    outcome = data.set_index("participant_id")
    outcome = outcome.loc[id]
    return np.array(outcome)

In [6]:
train_outcome_ordered = preprocess(train_outcome, train_participant_id)
train_ADHD_outcome = train_outcome_ordered[:, 0]

train_quant_ordered = preprocess(train_quant, train_participant_id)
train_cate_ordered = preprocess(train_cate, train_participant_id)

In [7]:
train_fmri_matrices = train_fmri_matrices[..., np.newaxis]

print(train_fmri_matrices.shape)
print(train_ADHD_outcome.shape)
print(train_quant_ordered.shape)
print(train_cate_ordered.shape)

(1213, 200, 200, 1)
(1213,)
(1213, 18)
(1213, 9)


In [10]:
# CNN for fMRI
input_fmri = keras.Input(shape=(200, 200, 1), name = "fMRI_input")
x = layers.Conv2D(32, (3,3), activation="relu", padding="same")(input_fmri)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(64, (3,3), activation="relu", padding="same")(x)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Conv2D(128, (3,3), activation="relu", padding="same")(x)
x = layers.MaxPooling2D((2,2))(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation="relu")(x)
x = layers.Dropout(0.5)(x)

# MLP for Quantitative
input_quant = keras.Input(shape=(train_quant_ordered.shape[1],), name = "Quant_input")
q = layers.Dense(64, activation="relu")(input_quant)
q = layers.Dense(32, activation="relu")(q)

# MLP for Categorical
input_cate = keras.Input(shape=(train_cate_ordered.shape[1],), name = "Cate_input")
c = layers.Dense(64, activation="relu")(input_cate)
c = layers.Dense(32, activation="relu")(c)

# Combine CNN and MLP
merged = layers.concatenate([x, q, c])
merged = layers.Dense(128, activation="relu")(merged)
merged = layers.Dropout(0.5)(merged)
output = layers.Dense(1, activation="sigmoid", name = "output")(merged)

model = keras.Model(inputs=[input_fmri, input_quant, input_cate], outputs = output)

model.compile(optimizer="adam", loss="binary_crossentropy", metrics = ["accuracy"])

In [11]:
history = model.fit(
    [train_fmri_matrices, train_quant_ordered, train_cate_ordered], train_ADHD_outcome,
    epochs = 20,
    batch_size = 32,
    validation_split = 0.2
)

Epoch 1/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 317ms/step - accuracy: 0.3345 - loss: nan - val_accuracy: 0.3333 - val_loss: nan
Epoch 2/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 310ms/step - accuracy: 0.3115 - loss: nan - val_accuracy: 0.3333 - val_loss: nan
Epoch 3/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 307ms/step - accuracy: 0.3281 - loss: nan - val_accuracy: 0.3333 - val_loss: nan
Epoch 4/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 306ms/step - accuracy: 0.3186 - loss: nan - val_accuracy: 0.3333 - val_loss: nan
Epoch 5/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 305ms/step - accuracy: 0.3161 - loss: nan - val_accuracy: 0.3333 - val_loss: nan
Epoch 6/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 301ms/step - accuracy: 0.3014 - loss: nan - val_accuracy: 0.3333 - val_loss: nan
Epoch 7/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [13]:
model2 = keras.Sequential([
    layers.Conv2D(32, (3,3), activation="relu", padding="same", input_shape=(200, 200, 1)),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64, (3,3), activation="relu", padding="same"),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128, (3,3), activation="relu", padding="same"),
    layers.MaxPooling2D((2,2)),

    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(1, activation="sigmoid")
])

model2.compile(optimizer="adam", loss="binary_crossentropy", metrics = ["accuracy"])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
history2 = model2.fit(
    train_fmri_matrices, train_ADHD_outcome,
    epochs = 20,
    batch_size = 32,
    validation_split = 0.2
)

Epoch 1/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 352ms/step - accuracy: 0.6867 - loss: 0.6515 - val_accuracy: 0.6667 - val_loss: 0.6363
Epoch 2/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 319ms/step - accuracy: 0.6979 - loss: 0.6295 - val_accuracy: 0.6667 - val_loss: 0.6392
Epoch 3/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 312ms/step - accuracy: 0.6939 - loss: 0.6185 - val_accuracy: 0.6667 - val_loss: 0.6438
Epoch 4/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 309ms/step - accuracy: 0.6792 - loss: 0.6425 - val_accuracy: 0.6667 - val_loss: 0.6348
Epoch 5/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 343ms/step - accuracy: 0.6992 - loss: 0.6158 - val_accuracy: 0.6667 - val_loss: 0.6354
Epoch 6/20
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 316ms/step - accuracy: 0.6879 - loss: 0.6343 - val_accuracy: 0.6667 - val_loss: 0.6358
Epoch 7/20
[1m31/31[