# cGCN fMRI Analysis on Kaggle
## Running from GitHub with GPU/TPU Support

This notebook loads the cGCN implementation directly from GitHub and runs it on Kaggle with GPU/TPU optimization.

**Note:** Make sure to enable GPU in Kaggle Notebook Settings: Settings → Accelerator → GPU T4 x2

## 1. Setup Environment and Clone GitHub Repository

In [None]:
# Clone the GitHub repository
!git clone https://github.com/ismailukman/GCN_fMRI.git
%cd GCN_fMRI

## 2. Check GPU/TPU Availability

In [None]:
import tensorflow as tf
import numpy as np

# Check TensorFlow version
print(f"TensorFlow Version: {tf.__version__}")

# Check for GPU
print(f"\nGPU Available: {tf.test.is_gpu_available()}")
print(f"Number of GPUs: {len(tf.config.list_physical_devices('GPU'))}")

# List all available devices
print("\nAvailable devices:")
for device in tf.config.list_physical_devices():
    print(f"  {device}")

# Enable memory growth for GPU to avoid OOM errors
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"\nMemory growth enabled for {len(gpus)} GPU(s)")
    except RuntimeError as e:
        print(e)

## 3. Download Data Files

### Option A: Upload data files to Kaggle Datasets
1. Download the data files from the links in README.md
2. Upload them as a Kaggle dataset
3. Add the dataset to this notebook
4. Uncomment and modify the paths below

### Option B: Direct download from Google Drive (shown below)

In [None]:
# Install gdown for Google Drive downloads
!pip install -q gdown

import gdown
import os

# Create data directory if it doesn't exist
os.makedirs('.', exist_ok=True)

print("Downloading HCP dataset...")
# HCP.h5 (864 MB)
gdown.download('https://drive.google.com/uc?id=1l029ZuOIUY5gehBZCAyHaJqMNuxRHTFc', 'HCP.h5', quiet=False)

print("\nDownloading FC matrix...")
# FC.npy
gdown.download('https://drive.google.com/uc?id=1WP4_9bps-NbX6GNBnhFu8itV3y1jriJL', 'FC.npy', quiet=False)

print("\nData files downloaded successfully!")

## 4. Verify Data Files

In [None]:
import h5py

# Check if files exist and their sizes
files_to_check = ['HCP.h5', 'FC.npy']
for file in files_to_check:
    if os.path.exists(file):
        size_mb = os.path.getsize(file) / (1024 * 1024)
        print(f"✓ {file}: {size_mb:.2f} MB")
    else:
        print(f"✗ {file}: NOT FOUND")

# Load and inspect HCP data
with h5py.File('HCP.h5', 'r') as f:
    print("\nHCP.h5 contents:")
    for key in f.keys():
        print(f"  {key}: {f[key].shape}")

## 5. Configure GPU/TPU Settings for Training

In [None]:
# Set mixed precision for better GPU performance
from tensorflow.keras import mixed_precision

# Enable mixed precision training (float16 with float32 accumulators)
# This can significantly speed up training on modern GPUs
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

print(f"Compute dtype: {policy.compute_dtype}")
print(f"Variable dtype: {policy.variable_dtype}")

# Configure TensorFlow for optimal GPU usage
tf.config.optimizer.set_jit(True)  # Enable XLA compilation
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})

print("\nGPU optimization configured!")

## 6. Import Model and Utils

In [None]:
# Import from the cloned repository
from model import get_model
from utils import save_logs_models

import random
import os
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from time import gmtime, strftime

print("Model imported successfully!")

## 7. Load and Prepare Data

In [None]:
ROI_N = 236
frames = 100

# Load HCP data
with h5py.File('HCP.h5', 'r') as f:
    x_train, x_val, x_test = f['x_train'][()], f['x_val'][()], f['x_test'][()]
    y_train, y_val, y_test = f['y_train'][()], f['y_val'][()], f['y_test'][()]

# Add channel dimension
x_train = np.expand_dims(x_train, -1)
x_val = np.expand_dims(x_val, -1)
x_test = np.expand_dims(x_test, -1)

print(f"x_train shape: {x_train.shape}")
print(f"x_val shape: {x_val.shape}")
print(f"x_test shape: {x_test.shape}")

# Convert to categorical
num_classes = 100
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

print(f"\ny_train shape: {y_train.shape}")
print(f"y_val shape: {y_val.shape}")
print(f"y_test shape: {y_test.shape}")

## 8. Set Training Parameters

In [None]:
# Training hyperparameters
k = 5  # Number of nearest neighbors
batch_size = 16  # Increased for GPU (original: 8)
epochs = 100
l2_reg = 1e-4
dp = 0.5
lr = 1e-5

print(f"Dropout: {dp}")
print(f"L2 regularization: {l2_reg}")
print(f"Batch size: {batch_size}")
print(f"Epochs: {epochs}")
print(f"Learning rate: {lr}")
print(f"k (neighbors): {k}")

# Setup output directory
file_name = f'kaggle_k_{k}_l2_{l2_reg}_dp_{dp}'
print(f"\nFile name: {file_name}")

os.makedirs('tmp', exist_ok=True)
tmp_name = f'tmp/tmp_{file_name}_{strftime("%Y_%m_%d_%H_%M_%S", gmtime())}.hdf5'
print(f"Model checkpoint: {tmp_name}")

## 9. Build Model

In [None]:
# Build model
model = get_model(
    graph_path='FC.npy',
    ROI_N=ROI_N,
    frames=frames,
    kernels=[8, 8, 8, 16, 32, 32],
    k=k,
    l2_reg=l2_reg,
    dp=dp,
    num_classes=num_classes,
    weight_path=None,
    skip=[0, 0]
)

model.summary()

## 10. Compile and Train Model

In [None]:
# Compile model
model.compile(
    loss=['categorical_crossentropy'],
    optimizer=optimizers.legacy.Adam(learning_rate=lr),
    metrics=['accuracy']
)

# Setup callbacks
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,
    patience=10,
    min_lr=1e-6
)

lr_hist = []

class Lr_record(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs={}):
        tmp = K.get_value(model.optimizer.learning_rate)
        lr_hist.append(tmp)
        print(f'Learning rate: {tmp}')

lr_record = Lr_record()

earlystop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10
)

checkpointer = tf.keras.callbacks.ModelCheckpoint(
    monitor='val_accuracy',
    filepath=tmp_name,
    verbose=1,
    save_best_only=True
)

# TensorBoard callback for visualization
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir='./logs',
    histogram_freq=1
)

print("Starting training...")
print("Monitor training progress in real-time!\n")

# Train model
model_history = model.fit(
    x_train, y_train,
    shuffle=True,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(x_val, y_val),
    callbacks=[checkpointer, lr_record, reduce_lr, earlystop, tensorboard_callback]
)

## 11. Evaluate Best Model

In [None]:
print("Loading best model for evaluation...")

# Load best model
model_best = get_model(
    graph_path='FC.npy',
    ROI_N=ROI_N,
    frames=frames,
    kernels=[8, 8, 8, 16, 32, 32],
    k=k,
    l2_reg=l2_reg,
    num_classes=num_classes,
    weight_path=tmp_name,
    skip=[0, 0]
)

model_best.compile(
    loss=['categorical_crossentropy'],
    optimizer=optimizers.legacy.Adam(learning_rate=lr),
    metrics=['accuracy']
)

# Evaluate on validation set
val_results = model_best.evaluate(x=x_val, y=y_val, batch_size=batch_size, verbose=1)
print(f"\nValidation - Loss: {val_results[0]:.4f}, Accuracy: {val_results[1]:.4f}")

# Evaluate on test set
test_results = model_best.evaluate(x=x_test, y=y_test, batch_size=batch_size, verbose=1)
print(f"Test - Loss: {test_results[0]:.4f}, Accuracy: {test_results[1]:.4f}")

## 12. Save Results

In [None]:
# Save logs and models
save_logs_models(
    model,
    model_history,
    acc=val_results[1],
    folder='tmp/',
    lr_hist=lr_hist,
    file_name=file_name,
    loss_name='loss',
    acc_name='accuracy',
    tmp_name=tmp_name
)

print("\nTraining complete! Results saved.")

## 13. Visualize Training History

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy plot
ax1.plot(model_history.history['accuracy'], label='Train Accuracy')
ax1.plot(model_history.history['val_accuracy'], label='Val Accuracy')
ax1.set_title('Model Accuracy')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
ax1.grid(True)

# Loss plot
ax2.plot(model_history.history['loss'], label='Train Loss')
ax2.plot(model_history.history['val_loss'], label='Val Loss')
ax2.set_title('Model Loss')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nFinal Results:")
print(f"Best Validation Accuracy: {max(model_history.history['val_accuracy']):.4f}")
print(f"Test Accuracy: {test_results[1]:.4f}")