# GIN < FC

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from importlib import reload
import os
import torch
import operator

import sys
sys.path.append("..")
from brain_connectivity import dataset, gin, dense, enums, training, evaluation, general_utils, data_utils


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

## Train model

In [None]:
# Dataset parameters settings.
dataset_params = {
    # Raw data.
    "data_folder": os.path.normpath('../data'),
    "device": device,
    "upsample_ts": None,
    "upsample_ts_method": None,
    "correlation_type": enums.CorrelationType.PEARSON,

    # General data handling.
    "node_features": enums.NodeFeatures.FC_MATRIX_ROW,
    "batch_size": 1,

    # How to create FC matrix.
    "geometric_kwargs": {
        "thresholding_function": enums.ThresholdingFunction.SUBJECT_VALUES,
        "threshold_type": enums.DataThresholdingType.FIXED_THRESHOLD,
        "threshold": 0.05,
        "thresholding_operator": operator.ge,
        "threshold_by_absolute_value": True,
        "return_absolute_value": False,
    }
}

In [None]:
# Training parameters settings.
training_params = {
    # Training regime.
    'epochs': 10,
    'validation_frequency': 1,

    # Optimizer.
    'optimizer': torch.optim.Adam,
    'optimizer_kwargs': {
        # 'momentum': 0.5,
        'lr': 0.001,
        'weight_decay': 0.0001
    },

    # Scheduler.
    # 'scheduler': torch.optim.lr_scheduler.StepLR,
    # 'scheduler_kwargs': {
    #     'step_size': 50,
    #     'gamma': 0.5
    # },

    # Loss.
    'criterion': torch.nn.CrossEntropyLoss(),

    # Plotting.
    'fc_matrix_plot_frequency': 50,
    'fc_matrix_plot_sublayer': 0
}

In [None]:
# Model parameters settings.
model_params = {
    'size_in': 90,
    'num_hidden_features': 64,
    'num_sublayers': 1,
    'dropout': 0.5
}

gin_params = {
    'eps': 0.2
}

dense_params = {
    'mode': enums.ConnectivityMode.MULTIPLE,
    'num_nodes': 90,
    'connectivity_dropout': 0.0,
    'readout': 'add',
    'emb_dropout': 0.0,
    'emb_residual': "add",
    'emb_init_weights': "constant",
    'emb_val': 0.0,
    'emb_std': 0.01,
}

In [None]:
dataframe_with_subjects = "patients-cleaned.csv"
target_column = "target"

df = pd.read_csv(
    os.path.join(os.path.normpath("../data"), dataframe_with_subjects),
    index_col=0,
)
targets = df[target_column].values


In [None]:
reload(dataset)
reload(gin)
reload(dense)
reload(evaluation)
reload(training)
reload(data_utils)

In [None]:
i = 6

In [None]:
experiment_folder = os.path.join(os.path.normpath("../runs"), f"test_fc_{i}")

try:
    os.makedirs(experiment_folder, exist_ok=False)
except FileExistsError as e:
    raise ValueError(
        f"Run experiment with existing name ({experiment_folder})."
    ) from e

i += 1
general_utils.close_all_loggers()

In [None]:
# Init cross-validation.
cv = data_utils.StratifiedCrossValidation(
    experiment_folder, targets=targets, num_assess_folds=2, num_select_folds=10
)


for outter_id in cv.outter_cross_validation():
    # Model selection.
    # Save results for each hyperparameter selection and keep best.
    experiment_results = []
    best_hyperparameters = None
    best_mean_accuracy = 0
    best_std_accuracy = 0

    for hyper_id, hyperparameters in enumerate([1, 2, 3]):
        # TODO: Put hyperparameters in folder name.
        log_folder = os.path.join(experiment_folder, f"{outter_id:03d}", f"{hyper_id:03d}_BAF")

        model, data, trainer = training.init_geometric_traning(
            log_folder, device, hyperparameters, targets
        )

        # Run training.
        train_dataset = "train"
        eval_dataset = "val"
        for inner_id in cv.inner_cross_validation():
            trainer.train(
                model=model,
                named_trainloader=(
                    train_dataset,
                    data.geometric_loader(
                        dataset=train_dataset, indices=cv.train_indices
                    ),
                ),
                named_evalloader=(
                    eval_dataset,
                    data.geometric_loader(
                        dataset=eval_dataset, indices=cv.val_indices
                    ),
                ),
                fold=inner_id,
            )

        # Results.
        train_results, eval_results = trainer.get_results(
            train_dataset=train_dataset, eval_dataset=eval_dataset
        )
        experiment_results.append(
            {
                train_dataset: train_results,
                eval_dataset: eval_results,
                "hyperparameters": hyperparameters,
            }
        )
        # Update best setting based on eval accuracy
        max_index = np.argmax(
            eval_results["accuracy"][0] - eval_results["accuracy"][1]
        )
        max_mean_accuracy = eval_results["accuracy"][0][max_index]
        max_std_accuracy = eval_results["accuracy"][1][max_index]

        if (max_mean_accuracy - max_std_accuracy) > (
            best_mean_accuracy - best_std_accuracy
        ):
            hyperparameters["epochs"] = max_index + 1
            best_hyperparameters = hyperparameters
            best_mean_accuracy = max_mean_accuracy
            best_std_accuracy = max_std_accuracy

    # Model assessment.
    # Average over 3 runs to offset random initialization.
    for test_id in range(3):
        log_folder = os.path.join(experiment_folder, f"{outter_id:03d}", f"test_{test_id}")
        model, data, trainer = training.init_geometric_traning(
            log_folder, device, best_hyperparameters, targets
        )
        # Run training.
        train_dataset = "dev"
        eval_dataset = "test"
        trainer.train(
            model=model,
            named_trainloader=(
                train_dataset,
                data.geometric_loader(
                    dataset=train_dataset, indices=cv.dev_indices
                ),
            ),
            named_evalloader=(
                eval_dataset,
                data.geometric_loader(
                    dataset=eval_dataset, indices=cv.test_indices
                ),
            ),
            fold=f"test_{test_id}",
        )
        # Results.
        train_results, eval_results = trainer.get_results(
            train_dataset=train_dataset, eval_dataset=eval_dataset
        )

general_utils.close_all_loggers()
print("Finished training")


In [None]:
train_results

In [None]:
eval_results

In [None]:
general_utils.close_all_loggers