# CNN Predictor for Cognitive Score

In [18]:
import pandas as pd
import numpy as np
from scipy.io import loadmat

# scikit-learn modules
from sklearn.model_selection import train_test_split # for splitting the data

from sklearn.metrics import r2_score
# from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import tensorflow as tf
from tensorflow.keras import layers, models

import json
from joblib import dump

In [12]:
def normalise_correlate_fc(fc):
    fc_emp = fc / np.max(fc)
    fc_emp = np.corrcoef(fc_emp)
    return fc_emp

In [13]:
# evaluate 
def eval(model, x_test_scaled, y_test):
    y_pred = model.predict(x_test_scaled)
    r2_score = round(r2_score(y_test, y_pred),2)

    print(f'r2: {r2_score}')

In [4]:
ADSP_DATA = '../data/ADSP_PHC_COGN_Dec2023_FILTERED_wfiles.csv'

df_mem = pd.read_csv(ADSP_DATA)
df_exf = pd.read_csv(ADSP_DATA)
df_lan = pd.read_csv(ADSP_DATA)
df_vsp = pd.read_csv(ADSP_DATA)

df_mem = df_mem.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_EXF', 'PHC_LAN', 'PHC_VSP'])
df_exf = df_exf.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_MEM', 'PHC_LAN', 'PHC_VSP'])
df_lan = df_lan.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_EXF', 'PHC_MEM', 'PHC_VSP'])
df_vsp = df_vsp.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_EXF', 'PHC_LAN', 'PHC_MEM'])
df_mem.shape

(1343, 3)

In [5]:
df_exf.dropna(subset=['PHC_EXF'], inplace=True)
df_exf.shape

(1343, 3)

In [6]:
train, test = train_test_split(df_exf, test_size=0.2, random_state=42)

# Get targets for training + testing each predictor
y_train, y_test = train['PHC_EXF'], test['PHC_EXF']

In [7]:
# Get features for training and testing

# Get the FC data as numpy arrays
dim_x = len(train)
x_train = []
x_test = []

for i, file in enumerate(train['FC_DATA'].values):
    arr = loadmat(file)['ROI_activity'][:100, :] # get the first 100 regions
    fc = normalise_correlate_fc(arr)
    x_train.append(fc)

for i, file in enumerate(test['FC_DATA'].values):
    arr = loadmat(file)['ROI_activity'][:100, :] # get the first 100 regions
    fc = normalise_correlate_fc(arr)
    x_test.append(fc)

In [8]:
x_train = np.array(x_train)
x_test = np.array(x_test)

In [10]:
x_train.shape

(1074, 100, 100)

## Simple CNN Model

In [28]:
from keras import backend as K

In [29]:
def r2_keras(y_true, y_pred):
    SS_res =  K.sum(K.square( y_true - y_pred )) 
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) ) 
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [88]:
# Simple CNN architecture (32x64x64)

input_shape = (100,100,1)

# model = models.Sequential([
#     layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
#     layers.MaxPooling2D((2, 2)),
#     layers.Conv2D(64, (3, 3), activation='relu'),
#     layers.MaxPooling2D((2, 2)),
#     layers.Conv2D(32, (3, 3), activation='relu'),
#     layers.Flatten(),
#     layers.Dense(32, activation='relu'),
#     layers.Dense(1)  # Output layer for regression
# ])

# model = models.Sequential([
#     layers.Conv2D(8, (3, 3), activation='relu', input_shape=input_shape),
#     layers.BatchNormalization(), 
#     layers.MaxPooling2D((2, 2)),
#     layers.Conv2D(16, (3, 3), activation='relu'),
#     layers.BatchNormalization(), 
#     layers.MaxPooling2D((2, 2)),
#     layers.Conv2D(32, (3, 3), activation='relu'),
#     layers.BatchNormalization(), 
#     layers.Flatten(),
#     layers.Dense(32, activation='relu'),
#     layers.Dense(1) 
# ])

model = models.Sequential()
model.add(layers.Conv2D(16, (3, 3), activation='relu', input_shape = input_shape))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))
model.add(layers.Flatten())

model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.Dense(32, activation='relu'))

model.add(layers.Dense(1, activation='linear'))

In [89]:
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=[r2_keras])

# Print the model summary
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_19 (Conv2D)          (None, 98, 98, 16)        160       
                                                                 
 max_pooling2d_14 (MaxPoolin  (None, 49, 49, 16)       0         
 g2D)                                                            
                                                                 
 flatten_10 (Flatten)        (None, 38416)             0         
                                                                 
 dense_24 (Dense)            (None, 64)                2458688   
                                                                 
 dropout_3 (Dropout)         (None, 64)                0         
                                                                 
 dense_25 (Dense)            (None, 32)                2080      
                                                     

In [92]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
# early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.0005)

# history = model.fit(x_train, y_train, epochs=100, batch_size=16, validation_split=0.2, callbacks=[early_stopping])
history = model.fit(x_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[reduce_lr])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100

KeyboardInterrupt: 

In [None]:
loss, r2 = model.evaluate(x_test, y_test)
print("Test Loss:", loss)
print("Test R2:", r2)