# cGCN fMRI Analysis on Kaggle
## Running from GitHub with GPU/TPU Support

This notebook loads the cGCN implementation directly from GitHub and runs it on Kaggle with GPU/TPU optimization.

## ⚙️ Pre-configured Settings

This notebook is pre-configured with:
- ✅ **GPU T4 Accelerator** - Automatically enabled for faster training
- ✅ **Internet Access** - Required to clone code from GitHub

## ⚠️ IMPORTANT: Before Running This Notebook

**You MUST enable Internet if not already ON:**
- Go to Settings (right panel) → Internet → Turn ON

**Add your dataset:**
- Click "Add Data" → Search for your fMRI dataset and add it
- The notebook expects data at: `/kaggle/input/fmri-data/`
- Should contain: 
  - `HCP_rfMRI_100s4s_236_MGTR_matlab_train_val_test.h5`
  - `FC.npy`

---

**Troubleshooting:**
- If you see "Could not resolve host: github.com" → Internet is OFF
- If GPU is not detected → Check Settings → Accelerator → Should be "GPU T4"

## 1. Setup Environment and Clone GitHub Repository

In [None]:
import os
import sys
import subprocess

# Check if repository already exists
if os.path.exists('GCN_fMRI'):
    print("✓ Repository already exists, using existing clone")
    os.chdir('GCN_fMRI')
else:
    print("Cloning GitHub repository...")
    print("Repository: https://github.com/ismailukman/GCN_fMRI.git\n")
    
    # Use subprocess to capture output
    try:
        result = subprocess.run(
            ['git', 'clone', 'https://github.com/ismailukman/GCN_fMRI.git'],
            capture_output=True,
            text=True,
            timeout=60
        )
        
        # Show git output
        if result.stdout:
            print(result.stdout)
        if result.stderr:
            print(result.stderr)
        
        # Check if clone was successful
        if not os.path.exists('GCN_fMRI'):
            print("\n" + "="*70)
            print("ERROR: Failed to clone repository from GitHub")
            print("="*70)
            print("\nGit command output above shows the error.")
            print("\nCommon solutions:")
            print("1. Check that Internet is enabled: Settings → Internet → ON")
            print("2. If Internet is ON, try running this cell again")
            print("3. Check repository exists: https://github.com/ismailukman/GCN_fMRI")
            print("\nIf the error persists, you can manually download and upload the files:")
            print("- Go to: https://github.com/ismailukman/GCN_fMRI")
            print("- Download ZIP and extract")
            print("- Upload model.py, utils.py, and other Python files to this notebook")
            print("="*70)
            raise RuntimeError("Cannot proceed without repository files.")
        
        os.chdir('GCN_fMRI')
        print("\n✓ Repository cloned successfully")
        
    except subprocess.TimeoutExpired:
        print("\n✗ Git clone timed out after 60 seconds")
        print("Network might be slow. Try running this cell again.")
        raise
    except FileNotFoundError:
        print("\n✗ Git command not found")
        print("This shouldn't happen on Kaggle. Please report this issue.")
        raise

# Add the repository to Python path so imports work
repo_path = os.getcwd()
if repo_path not in sys.path:
    sys.path.insert(0, repo_path)
    print(f"✓ Added {repo_path} to Python path")

print(f"\nCurrent directory: {repo_path}")
print(f"Files in directory: {', '.join(os.listdir(repo_path)[:10])}")

## 2. Check GPU/TPU Availability

In [None]:
import tensorflow as tf
import numpy as np

# Check TensorFlow version
print(f"TensorFlow Version: {tf.__version__}")

# Check for GPU
print(f"\nGPU Available: {tf.test.is_gpu_available()}")
print(f"Number of GPUs: {len(tf.config.list_physical_devices('GPU'))}")

# List all available devices
print("\nAvailable devices:")
for device in tf.config.list_physical_devices():
    print(f"  {device}")

# Enable memory growth for GPU to avoid OOM errors
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"\nMemory growth enabled for {len(gpus)} GPU(s)")
    except RuntimeError as e:
        print(e)

## 3. Verify Kaggle Dataset

This notebook uses fMRI data from a Kaggle dataset.
Make sure you have added the dataset to this notebook:
- Click "Add Data" in the right panel
- Search for and add your fMRI dataset
- The data should be available at `/kaggle/input/fmri-data/`

In [None]:
import os
import h5py

# Define Kaggle dataset path
KAGGLE_DATA_PATH = '/kaggle/input/fmri-data/'

# Check if dataset directory exists
if os.path.exists(KAGGLE_DATA_PATH):
    print(f"✓ Kaggle dataset found at: {KAGGLE_DATA_PATH}")
    print("\nDataset contents:")
    for item in os.listdir(KAGGLE_DATA_PATH):
        item_path = os.path.join(KAGGLE_DATA_PATH, item)
        if os.path.isfile(item_path):
            size_mb = os.path.getsize(item_path) / (1024 * 1024)
            print(f"  {item}: {size_mb:.2f} MB")
else:
    print(f"✗ Dataset not found at: {KAGGLE_DATA_PATH}")
    print("\nPlease add the fMRI dataset to this notebook:")
    print("1. Click 'Add Data' in the right panel")
    print("2. Search for your fMRI dataset")
    print("3. Click 'Add' to attach it to this notebook")
    raise FileNotFoundError(f"Dataset not found at {KAGGLE_DATA_PATH}")

# Define file paths
HCP_PATH = os.path.join(KAGGLE_DATA_PATH, 'HCP_rfMRI_100s4s_236_MGTR_matlab_train_val_test.h5')
FC_PATH = os.path.join(KAGGLE_DATA_PATH, 'FC.npy')

# Verify required files
print("\nVerifying required files:")
files_to_check = {'HCP_rfMRI_100s4s_236_MGTR_matlab_train_val_test.h5': HCP_PATH, 'FC.npy': FC_PATH}
for name, path in files_to_check.items():
    if os.path.exists(path):
        size_mb = os.path.getsize(path) / (1024 * 1024)
        print(f"  ✓ {name}: {size_mb:.2f} MB")
    else:
        print(f"  ✗ {name}: NOT FOUND")
        raise FileNotFoundError(f"Required file not found: {path}")

# Inspect HCP data structure
print("\nHCP data structure:")
with h5py.File(HCP_PATH, 'r') as f:
    for key in f.keys():
        print(f"  {key}: {f[key].shape}")

print("\n✓ All required files verified!")

## 4. Configure GPU/TPU Settings for Training

In [None]:
# Set mixed precision for better GPU performance
from tensorflow.keras import mixed_precision

# Enable mixed precision training (float16 with float32 accumulators)
# This can significantly speed up training on modern GPUs
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

print(f"Compute dtype: {policy.compute_dtype}")
print(f"Variable dtype: {policy.variable_dtype}")

# Configure TensorFlow for optimal GPU usage
tf.config.optimizer.set_jit(True)  # Enable XLA compilation
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})

print("\n✓ GPU optimization configured!")

## 5. Import Model and Utils

In [None]:
# Verify we're in the correct directory
import os
import sys

print(f"Current working directory: {os.getcwd()}")
print(f"Python path includes: {os.getcwd() in sys.path}")

# Check if required files exist
required_files = ['model.py', 'utils.py']
missing_files = [f for f in required_files if not os.path.exists(f)]
if missing_files:
    print(f"\n✗ Missing files: {missing_files}")
    print("Make sure you ran the cell to clone the GitHub repository!")
    raise FileNotFoundError(f"Required files not found: {missing_files}")

# Import from the cloned repository
from model import get_model
from utils import save_logs_models

import random
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from time import gmtime, strftime

print("\n✓ Model imported successfully!")

## 6. Load and Prepare Data

In [None]:
ROI_N = 236
frames = 100

print("Loading HCP data from Kaggle dataset...")
# Load HCP data from Kaggle dataset
with h5py.File(HCP_PATH, 'r') as f:
    x_train, x_val, x_test = f['x_train'][()], f['x_val'][()], f['x_test'][()]
    y_train, y_val, y_test = f['y_train'][()], f['y_val'][()], f['y_test'][()]

# Add channel dimension
x_train = np.expand_dims(x_train, -1)
x_val = np.expand_dims(x_val, -1)
x_test = np.expand_dims(x_test, -1)

print(f"x_train shape: {x_train.shape}")
print(f"x_val shape: {x_val.shape}")
print(f"x_test shape: {x_test.shape}")

# Convert to categorical
num_classes = 100
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

print(f"\ny_train shape: {y_train.shape}")
print(f"y_val shape: {y_val.shape}")
print(f"y_test shape: {y_test.shape}")

print("\n✓ Data loaded successfully!")

## 7. Set Training Parameters

In [None]:
# Training hyperparameters
k = 5  # Number of nearest neighbors
batch_size = 16  # Increased for GPU (original: 8)
epochs = 100
l2_reg = 1e-4
dp = 0.5
lr = 1e-5

print("Training Configuration:")
print(f"  Dropout: {dp}")
print(f"  L2 regularization: {l2_reg}")
print(f"  Batch size: {batch_size}")
print(f"  Epochs: {epochs}")
print(f"  Learning rate: {lr}")
print(f"  k (neighbors): {k}")

# Setup output directory
file_name = f'kaggle_k_{k}_l2_{l2_reg}_dp_{dp}'
print(f"\nFile name: {file_name}")

os.makedirs('tmp', exist_ok=True)
tmp_name = f'tmp/tmp_{file_name}_{strftime("%Y_%m_%d_%H_%M_%S", gmtime())}.hdf5'
print(f"Model checkpoint: {tmp_name}")

## 8. Build Model

In [None]:
print("Building model with FC matrix from Kaggle dataset...")

# Build model using FC matrix from Kaggle dataset
model = get_model(
    graph_path=FC_PATH,  # Use Kaggle dataset path
    ROI_N=ROI_N,
    frames=frames,
    kernels=[8, 8, 8, 16, 32, 32],
    k=k,
    l2_reg=l2_reg,
    dp=dp,
    num_classes=num_classes,
    weight_path=None,
    skip=[0, 0]
)

model.summary()

## 9. Compile and Train Model

In [None]:
# Compile model
model.compile(
    loss=['categorical_crossentropy'],
    optimizer=optimizers.legacy.Adam(learning_rate=lr),
    metrics=['accuracy']
)

# Setup callbacks
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.5,
    patience=10,
    min_lr=1e-6
)

lr_hist = []

class Lr_record(tf.keras.callbacks.Callback):
    def on_epoch_begin(self, epoch, logs={}):
        tmp = K.get_value(model.optimizer.learning_rate)
        lr_hist.append(tmp)
        print(f'Learning rate: {tmp}')

lr_record = Lr_record()

earlystop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10
)

checkpointer = tf.keras.callbacks.ModelCheckpoint(
    monitor='val_accuracy',
    filepath=tmp_name,
    verbose=1,
    save_best_only=True
)

# TensorBoard callback for visualization
tensorboard_callback = tf.keras.callbacks.TensorBoard(
    log_dir='./logs',
    histogram_freq=1
)

print("\n" + "="*60)
print("Starting training...")
print("Monitor training progress in real-time!")
print("="*60 + "\n")

# Train model
model_history = model.fit(
    x_train, y_train,
    shuffle=True,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(x_val, y_val),
    callbacks=[checkpointer, lr_record, reduce_lr, earlystop, tensorboard_callback]
)

## 10. Evaluate Best Model

In [None]:
print("Loading best model for evaluation...\n")

# Load best model using FC matrix from Kaggle dataset
model_best = get_model(
    graph_path=FC_PATH,  # Use Kaggle dataset path
    ROI_N=ROI_N,
    frames=frames,
    kernels=[8, 8, 8, 16, 32, 32],
    k=k,
    l2_reg=l2_reg,
    num_classes=num_classes,
    weight_path=tmp_name,
    skip=[0, 0]
)

model_best.compile(
    loss=['categorical_crossentropy'],
    optimizer=optimizers.legacy.Adam(learning_rate=lr),
    metrics=['accuracy']
)

# Evaluate on validation set
print("Evaluating on validation set...")
val_results = model_best.evaluate(x=x_val, y=y_val, batch_size=batch_size, verbose=1)
print(f"\nValidation Results - Loss: {val_results[0]:.4f}, Accuracy: {val_results[1]:.4f}")

# Evaluate on test set
print("\nEvaluating on test set...")
test_results = model_best.evaluate(x=x_test, y=y_test, batch_size=batch_size, verbose=1)
print(f"\nTest Results - Loss: {test_results[0]:.4f}, Accuracy: {test_results[1]:.4f}")

## 11. Save Results

In [None]:
# Save logs and models
save_logs_models(
    model,
    model_history,
    acc=val_results[1],
    folder='tmp/',
    lr_hist=lr_hist,
    file_name=file_name,
    loss_name='loss',
    acc_name='accuracy',
    tmp_name=tmp_name
)

print("\n✓ Training complete! Results saved.")

## 12. Visualize Training History

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy and loss
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Accuracy plot
ax1.plot(model_history.history['accuracy'], label='Train Accuracy', linewidth=2)
ax1.plot(model_history.history['val_accuracy'], label='Val Accuracy', linewidth=2)
ax1.set_title('Model Accuracy', fontsize=14, fontweight='bold')
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Accuracy', fontsize=12)
ax1.legend(fontsize=10)
ax1.grid(True, alpha=0.3)

# Loss plot
ax2.plot(model_history.history['loss'], label='Train Loss', linewidth=2)
ax2.plot(model_history.history['val_loss'], label='Val Loss', linewidth=2)
ax2.set_title('Model Loss', fontsize=14, fontweight='bold')
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Loss', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n" + "="*60)
print("FINAL RESULTS")
print("="*60)
print(f"Best Validation Accuracy: {max(model_history.history['val_accuracy']):.4f}")
print(f"Test Accuracy: {test_results[1]:.4f}")
print("="*60)