In [1]:
# Data Download
from tqdm import tqdm
from itertools import product
from nilearn import datasets

pipelines = ["cpac", "ccs", "dparsf", "niak"]
band_pass = [True, False]
global_signal = [True, False]
parcellations = ["rois_aal", "rois_cc200", "rois_dosenbach160"]

# Subset of subjects to download
SUB_IDS = [50012, 50014, 50015, 50016, 50020, 50022, 50023, 50024, 50025, 50027, # controls
           50030, 50031, 50032, 50033, 50034, 50035, 50036, 50037, 50038, 50040] # autism

def fetch_data(pipe, bp, gsr, parc):
    bunch = datasets.fetch_abide_pcp(SUB_ID=SUB_IDS, data_dir="./abide_data", verbose=0,
                                     pipeline=pipe, derivatives=parc, band_pass_filtering=bp, global_signal_regression=gsr)
    return (pipe, bp, gsr, parc), bunch

all_combinations = list(product(pipelines, band_pass, global_signal, parcellations))
abide_dataset = {}

for combo in tqdm(all_combinations, desc="Fetching ABIDE data"):
    key, bunch = fetch_data(*combo)
    abide_dataset[key] = bunch

print(f"Available pipelines: {list(abide_dataset.keys())}")
print(f"Number of subjects:  {len(abide_dataset[('cpac', True, True, 'rois_aal')].phenotypic)}")
print(f"Class distribution:  {abide_dataset[('cpac', True, True, 'rois_aal')].phenotypic['DX_GROUP'].value_counts()}")

Fetching ABIDE data: 100%|██████████| 48/48 [23:52<00:00, 29.84s/it]

Available pipelines: [('cpac', True, True, 'rois_aal'), ('cpac', True, True, 'rois_cc200'), ('cpac', True, True, 'rois_dosenbach160'), ('cpac', True, False, 'rois_aal'), ('cpac', True, False, 'rois_cc200'), ('cpac', True, False, 'rois_dosenbach160'), ('cpac', False, True, 'rois_aal'), ('cpac', False, True, 'rois_cc200'), ('cpac', False, True, 'rois_dosenbach160'), ('cpac', False, False, 'rois_aal'), ('cpac', False, False, 'rois_cc200'), ('cpac', False, False, 'rois_dosenbach160'), ('ccs', True, True, 'rois_aal'), ('ccs', True, True, 'rois_cc200'), ('ccs', True, True, 'rois_dosenbach160'), ('ccs', True, False, 'rois_aal'), ('ccs', True, False, 'rois_cc200'), ('ccs', True, False, 'rois_dosenbach160'), ('ccs', False, True, 'rois_aal'), ('ccs', False, True, 'rois_cc200'), ('ccs', False, True, 'rois_dosenbach160'), ('ccs', False, False, 'rois_aal'), ('ccs', False, False, 'rois_cc200'), ('ccs', False, False, 'rois_dosenbach160'), ('dparsf', True, True, 'rois_aal'), ('dparsf', True, True, 'ro




In [2]:
from comet import multiverse

forking_paths = {
    "pipeline": ["cpac", "ccs", "dparsf", "niak"],                          # Preprocessing pipelines
    "parcellation": ["rois_aal", "rois_cc200", "rois_dosenbach160"],        # Parcellated time series data
    "band_pass": [True, False],                                             # Band-pass filtering
    "global_signal": [True, False],                                         # Global signal regression
    "connectivity":[                                                        # Functional connectivity method
        {"name": "pearson", "func": "comet.connectivity.Static_Pearson(ts).estimate()"},
        {"name": "partial", "func": "comet.connectivity.Static_Partial(ts).estimate()"}],
    "regularisation": [0.25, 1.0]                                           # Regularisation strength for the classifier
}

def analysis_template():
    import comet
    import numpy as np
    from nilearn import datasets
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import StratifiedKFold, cross_val_score

    # Subset of subjects do use
    SUB_IDS = [50012, 50014, 50015, 50016, 50020, 50022, 50023, 50024, 50025, 50027, # controls
               50030, 50031, 50032, 50033, 50034, 50035, 50036, 50037, 50038, 50040] # autism

    # Get data (if available, it will be loaded from disk)
    data = datasets.fetch_abide_pcp(SUB_ID=SUB_IDS, data_dir="./abide_data", verbose=0,
                                    pipeline={{pipeline}},
                                    derivatives={{parcellation}},
                                    band_pass_filtering={{band_pass}},
                                    global_signal_regression={{global_signal}})

    time_series = data[{{parcellation}}]
    diagnosis = data["phenotypic"]["DX_GROUP"]

    # Calculate FC
    tri_ix = None
    features = []

    for ts in time_series:
        FC = {{connectivity}}

        if tri_ix == None:
            tri_ix = np.triu_indices_from(FC, k=1)

        feat_vec = FC[tri_ix]
        features.append(feat_vec)

    # Prepare features (FC estimates) and target (autism/control)
    X = np.vstack(features)
    X[np.isnan(X)] = 0.0
    y = np.array(diagnosis)

    # Classification model
    model = Pipeline([('scaler', StandardScaler()), ('reg', LogisticRegression(penalty='l2', C={{regularisation}}, tol=1e-3))])
    cv = StratifiedKFold(n_splits=5)
    accuracies = cross_val_score(model, X, y, cv=cv, scoring='accuracy')

    # Save the results
    comet.utils.save_universe_results({"accuracy": accuracies})

# Create and run the multiverse analysis
mverse = multiverse.Multiverse(name="example_mv_abide")
mverse.create(analysis_template, forking_paths)
mverse.run(parallel=8)

  from tqdm.autonotebook import tqdm


Starting multiverse analysis for all universes...


Performing multiverse analysis::   0%|          | 0/192 [00:00<?, ?it/s]

The multiverse analysis completed without any errors.
