### Packages

In [63]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import seaborn as sns
import re
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, accuracy_score

# Set Up

### Demographic

In [64]:
info = pd.read_excel('../data/Brain_networks/Demographic.xlsx')
info['Participant_ID'] = info['Participant_ID'].apply(lambda x: int(re.findall(r'\d+', str(x))[0]))

# Remove duplicates
info = info.drop_duplicates(subset='Participant_ID')

# Reset index (optional)
info = info.reset_index(drop=True)

# Display the result
info.head()

Unnamed: 0,Participant_ID,ADHD/NT,Gender
0,200,NT,F
1,201,ADHD,F
2,202,NT,F
3,205,NT,F
4,207,NT,F


### Define

In [65]:
condition_map = {
    'rsfMRI_HB6': 'rest',
    'flanker_events': 'task',
    'flanker_events_twitching': 'twitching'
}

exclude_cols = ['Participant_ID', 'Sequence_name', 'Condition_matfile', 'Condition', 'ADHD/NT', 'Gender']

In [66]:
significant_networks = pd.read_excel('significant_networks.xlsx')
significant_networks

Unnamed: 0,Feature,F-statistic,p-value,condition
0,Medial-Occipital,7.251092,0.009248,rest
1,Occipital-Lateral(L),6.771685,0.011737,rest
2,Medial-Lateral(L),26.535364,3e-06,task
3,Medial-Lateral(R),18.988989,5.4e-05,task
4,ACC-RPFC(L),5.479466,0.022701,task
5,AInsula(L)-RPFC(R),4.295991,0.042656,task
6,LPFC(L)-PPC(L),6.861821,0.01122,task


# Random Forest

## Preprocess

### Visual

In [87]:
# Visual
visual_2021 = pd.read_excel('../data/Brain_networks/Network_CC/Visual_2021.xlsx')
visual_2022 = pd.read_excel('../data/Brain_networks/Network_CC/Visual_2022.xlsx')
visual_2023 = pd.read_excel('../data/Brain_networks/Network_CC/Visual_2023.xlsx')

all_visual = pd.concat([visual_2021, visual_2022, visual_2023], ignore_index=True)
all_visual.head()

Unnamed: 0,Participant_ID,Sequence_name,Condition_matfile,Condition,Medial-Occipital,Medial-Lateral(L),Medial-Lateral(R),Occipital-Lateral(L),Occipital-Lateral(R),Lateral(L)-Lateral(R),Averaged_CC
0,200,rsfMRI_HB6,Condition001.mat,rest,0.18123,0.22104,0.032202,0.68723,0.4797,0.97918,0.4301
1,201,rsfMRI_HB6,Condition001.mat,rest,0.44015,0.56515,0.42759,0.848,0.88916,1.2118,0.73031
2,202,rsfMRI_HB6,Condition001.mat,rest,0.28081,0.21935,0.58884,0.51701,0.6758,0.87309,0.52582
3,205,rsfMRI_HB6,Condition001.mat,rest,0.39404,0.88638,0.51938,0.50584,0.28219,1.0813,0.61152
4,207,rsfMRI_HB6,Condition001.mat,rest,0.44657,0.32933,0.27231,0.47209,0.3813,0.68107,0.43045


In [88]:
visual_w_info = all_visual.merge(info, on='Participant_ID', how='inner')
matrix_visual = visual_w_info.drop(columns=['Condition_matfile', 'Sequence_name'], inplace=False)
matrix_visual.head()

Unnamed: 0,Participant_ID,Condition,Medial-Occipital,Medial-Lateral(L),Medial-Lateral(R),Occipital-Lateral(L),Occipital-Lateral(R),Lateral(L)-Lateral(R),Averaged_CC,ADHD/NT,Gender
0,200,rest,0.18123,0.22104,0.032202,0.68723,0.4797,0.97918,0.4301,NT,F
1,201,rest,0.44015,0.56515,0.42759,0.848,0.88916,1.2118,0.73031,ADHD,F
2,202,rest,0.28081,0.21935,0.58884,0.51701,0.6758,0.87309,0.52582,NT,F
3,205,rest,0.39404,0.88638,0.51938,0.50584,0.28219,1.0813,0.61152,NT,F
4,207,rest,0.44657,0.32933,0.27231,0.47209,0.3813,0.68107,0.43045,NT,F


### Salience

In [89]:
# Salience
salience_2021 = pd.read_excel('../data/Brain_networks/Network_CC/Salience_2021.xlsx')
salience_2022 = pd.read_excel('../data/Brain_networks/Network_CC/Salience_2022.xlsx')
salience_2023 = pd.read_excel('../data/Brain_networks/Network_CC/Salience_2023.xlsx')

all_salience = pd.concat([salience_2021, salience_2022, salience_2023], ignore_index=True)
all_salience.head()

Unnamed: 0,Participant_ID,Sequence_name,Condition_matfile,Condition,ACC-AInsula(L),ACC-AInsula(R),ACC-RPFC(L),ACC-RPFC(R),ACC-SMG(L),ACC-SMG(R),...,AInsula(R)-RPFC(R),AInsula(R)-SMG(L),AInsula(R)-SMG(R),RPFC(L)-RPFC(R),RPFC(L)-SMG(L),RPFC(L)-SMG(R),RPFC(R)-SMG(L),RPFC(R)-SMG(R),SMG(L)-SMG(R),Averaged_CC
0,200,rsfMRI_HB6,Condition001.mat,rest,0.38244,0.42946,0.47293,0.68623,0.47652,0.14783,...,0.24826,0.40205,0.16082,0.51811,0.25721,0.083836,0.32207,0.010654,0.96934,0.3448
1,201,rsfMRI_HB6,Condition001.mat,rest,0.26701,0.30673,0.26035,0.47045,0.036738,0.29623,...,0.4424,0.13552,0.71314,0.79054,0.55826,0.60852,0.21384,0.58124,0.79743,0.49922
2,202,rsfMRI_HB6,Condition001.mat,rest,0.39955,0.76758,0.18989,0.30054,0.33014,0.0646,...,0.32921,0.48753,0.26006,0.42888,0.71127,0.33762,0.58176,0.95569,0.62045,0.39151
3,205,rsfMRI_HB6,Condition001.mat,rest,0.51393,0.60648,0.83598,0.76121,0.42954,-0.027161,...,0.64889,0.65663,0.22283,1.2365,0.55846,0.1853,0.59952,0.32887,0.55385,0.45836
4,207,rsfMRI_HB6,Condition001.mat,rest,0.50032,0.54461,0.35094,0.37688,0.28651,0.24603,...,0.32564,0.47198,0.62164,0.44331,0.75381,0.44949,0.38576,0.59805,0.79248,0.42589


In [90]:
salience_w_info = all_salience.merge(info, on='Participant_ID', how='inner')
matrix_salience = salience_w_info.drop(columns=['Condition_matfile', 'Sequence_name'], inplace=False)
matrix_salience.head()

Unnamed: 0,Participant_ID,Condition,ACC-AInsula(L),ACC-AInsula(R),ACC-RPFC(L),ACC-RPFC(R),ACC-SMG(L),ACC-SMG(R),AInsula(L)-AInsula(R),AInsula(L)-RPFC(L),...,AInsula(R)-SMG(R),RPFC(L)-RPFC(R),RPFC(L)-SMG(L),RPFC(L)-SMG(R),RPFC(R)-SMG(L),RPFC(R)-SMG(R),SMG(L)-SMG(R),Averaged_CC,ADHD/NT,Gender
0,200,rest,0.38244,0.42946,0.47293,0.68623,0.47652,0.14783,0.40015,0.63142,...,0.16082,0.51811,0.25721,0.083836,0.32207,0.010654,0.96934,0.3448,NT,F
1,201,rest,0.26701,0.30673,0.26035,0.47045,0.036738,0.29623,0.59892,0.81091,...,0.71314,0.79054,0.55826,0.60852,0.21384,0.58124,0.79743,0.49922,ADHD,F
2,202,rest,0.39955,0.76758,0.18989,0.30054,0.33014,0.0646,0.22255,0.55078,...,0.26006,0.42888,0.71127,0.33762,0.58176,0.95569,0.62045,0.39151,NT,F
3,205,rest,0.51393,0.60648,0.83598,0.76121,0.42954,-0.027161,0.41983,0.29928,...,0.22283,1.2365,0.55846,0.1853,0.59952,0.32887,0.55385,0.45836,NT,F
4,207,rest,0.50032,0.54461,0.35094,0.37688,0.28651,0.24603,0.63985,0.3283,...,0.62164,0.44331,0.75381,0.44949,0.38576,0.59805,0.79248,0.42589,NT,F


### FP

In [91]:
# FrontoParietal
fp_2021 = pd.read_excel('../data/Brain_networks/Network_CC/FP_2021.xlsx')
fp_2022 = pd.read_excel('../data/Brain_networks/Network_CC/FP_2022.xlsx')
fp_2023 = pd.read_excel('../data/Brain_networks/Network_CC/FP_2023.xlsx')

all_fp = pd.concat([fp_2021, fp_2022, fp_2023], ignore_index=True)
all_fp.head()

Unnamed: 0,Participant_ID,Sequence_name,Condition_matfile,Condition,LPFC(L)-PPC(L),LPFC(L)-LPFC(R),LPFC(L)-PPC(R),PPC(L)-LPFC(R),PPC(L)-PPC(R),LPFC(R)-PPC(R),Averaged_CC
0,200,rsfMRI_HB6,Condition001.mat,rest,0.74405,0.31677,0.07744,0.4263,0.42652,0.98033,0.49524
1,201,rsfMRI_HB6,Condition001.mat,rest,0.80099,0.17939,0.3125,0.07402,0.6891,0.61432,0.44505
2,202,rsfMRI_HB6,Condition001.mat,rest,1.2186,0.74083,0.52707,0.73794,0.64482,1.0955,0.82746
3,205,rsfMRI_HB6,Condition001.mat,rest,0.65107,0.79147,0.15068,0.82202,0.68807,0.5438,0.60785
4,207,rsfMRI_HB6,Condition001.mat,rest,0.75497,0.39063,0.45857,0.73926,0.70244,1.0723,0.68636


In [92]:
fp_w_info = all_fp.merge(info, on='Participant_ID', how='inner')
matrix_fp = fp_w_info.drop(columns=['Condition_matfile', 'Sequence_name'], inplace=False)
matrix_fp.head()

Unnamed: 0,Participant_ID,Condition,LPFC(L)-PPC(L),LPFC(L)-LPFC(R),LPFC(L)-PPC(R),PPC(L)-LPFC(R),PPC(L)-PPC(R),LPFC(R)-PPC(R),Averaged_CC,ADHD/NT,Gender
0,200,rest,0.74405,0.31677,0.07744,0.4263,0.42652,0.98033,0.49524,NT,F
1,201,rest,0.80099,0.17939,0.3125,0.07402,0.6891,0.61432,0.44505,ADHD,F
2,202,rest,1.2186,0.74083,0.52707,0.73794,0.64482,1.0955,0.82746,NT,F
3,205,rest,0.65107,0.79147,0.15068,0.82202,0.68807,0.5438,0.60785,NT,F
4,207,rest,0.75497,0.39063,0.45857,0.73926,0.70244,1.0723,0.68636,NT,F


### Partner's

In [93]:
# Sensorimotor
SenMotor_2021 = pd.read_excel('../data/Brain_networks/Network_CC/SenMotor_2021.xlsx')
SenMotor_2022 = pd.read_excel('../data/Brain_networks/Network_CC/SenMotor_2022.xlsx')
SenMotor_2023 = pd.read_excel('../data/Brain_networks/Network_CC/SenMotor_2023.xlsx')

all_SenMotor = pd.concat([SenMotor_2021, SenMotor_2022, SenMotor_2023], ignore_index=True)

SenMotor_w_info = all_SenMotor.merge(info, on='Participant_ID', how='inner')
matrix_SenMotor = SenMotor_w_info.drop(columns=['Condition_matfile', 'Sequence_name'], inplace=False)
matrix_SenMotor.head()

# DA
DA_2021 = pd.read_excel('../data/Brain_networks/Network_CC/DA_2021.xlsx')
DA_2022 = pd.read_excel('../data/Brain_networks/Network_CC/DA_2022.xlsx')
DA_2023 = pd.read_excel('../data/Brain_networks/Network_CC/DA_2023.xlsx')

all_DA = pd.concat([DA_2021, DA_2022, DA_2023], ignore_index=True)

DA_w_info = all_DA.merge(info, on='Participant_ID', how='inner')
matrix_DA = DA_w_info.drop(columns=['Condition_matfile', 'Sequence_name'], inplace=False)
matrix_DA.head()

# DMN
DMN_2021 = pd.read_excel('../data/Brain_networks/Network_CC/DMN_2021.xlsx')
DMN_2022 = pd.read_excel('../data/Brain_networks/Network_CC/DMN_2022.xlsx')
DMN_2023 = pd.read_excel('../data/Brain_networks/Network_CC/DMN_2023.xlsx')

all_DMN = pd.concat([DMN_2021, DMN_2022, DMN_2023], ignore_index=True)

DMN_w_info = all_DMN.merge(info, on='Participant_ID', how='inner')
matrix_DMN = DMN_w_info.drop(columns=['Condition_matfile', 'Sequence_name'], inplace=False)
matrix_DMN.head()

Unnamed: 0,Participant_ID,Condition,MPFC-PCC,MPFC-LP(L),MPFC-LP(R),PCC-LP(L),PCC-LP(R),LP(L)-LP(R),Averaged_CC,ADHD/NT,Gender
0,200,rest,0.32243,0.12195,0.29818,0.52458,0.83889,0.40638,0.41874,NT,F
1,201,rest,0.54773,0.30204,0.41196,0.7593,0.85822,0.7944,0.61228,ADHD,F
2,202,rest,0.60787,0.76637,0.90228,0.88364,0.90088,1.266,0.88784,NT,F
3,205,rest,0.58095,0.54134,0.645,0.86591,0.90772,1.2412,0.79702,NT,F
4,207,rest,0.47869,0.32974,0.38599,0.48359,0.51843,0.51397,0.45173,NT,F


## Feature extraction

In [102]:
visual_features = ['Medial-Occipital', 'Occipital-Lateral(L)', 'Medial-Lateral(L)', 'Medial-Lateral(R)']
salience_features = ['ACC-RPFC(L)', 'AInsula(L)-RPFC(R)']
fp_features = ['LPFC(L)-PPC(L)']

DMN_features = ['MPFC-PCC', 'MPFC-LP(L)', 'MPFC-LP(R)', 'LP(L)-LP(R)']

In [103]:
X_visual = matrix_visual[visual_features]
X_salience = matrix_salience[salience_features]
X_fp = matrix_fp[fp_features]

X_DMN = matrix_DMN[DMN_features]
X_DMN

Unnamed: 0,MPFC-PCC,MPFC-LP(L),MPFC-LP(R),LP(L)-LP(R)
0,0.322430,0.12195,0.29818,0.40638
1,0.547730,0.30204,0.41196,0.79440
2,0.607870,0.76637,0.90228,1.26600
3,0.580950,0.54134,0.64500,1.24120
4,0.478690,0.32974,0.38599,0.51397
...,...,...,...,...
163,0.495390,0.86287,0.51024,0.68197
164,0.130060,0.58762,0.86898,1.28890
165,0.414070,0.71636,0.71250,0.70223
166,0.085158,0.88539,0.81971,1.11880


In [104]:
y_visual = matrix_visual['ADHD/NT'].map({'NT': 0, 'ADHD': 1})
y_salience = matrix_salience['ADHD/NT'].map({'NT': 0, 'ADHD': 1})
y_fp = matrix_fp['ADHD/NT'].map({'NT': 0, 'ADHD': 1})

y_dmn = matrix_DMN['ADHD/NT'].map({'NT': 0, 'ADHD': 1})

## Function

In [77]:
def run_rf(X, y, network_name='Network'):
    print(f"\n=== Analyzing {network_name} ===")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [3, 5, 10, None],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['sqrt', 'log2']
    }

    grid_search = GridSearchCV(
        estimator=RandomForestClassifier(random_state=42, class_weight='balanced'),
        param_grid=param_grid,
        cv=5,
        scoring='accuracy',
        n_jobs=-1
    )

    grid_search.fit(X_train, y_train)
    print("Best parameters:", grid_search.best_params_)

    best_model = grid_search.best_estimator_
    y_pred_best = best_model.predict(X_test)

    # rf = RandomForestClassifier(n_estimators=100, random_state=42)
    # rf.fit(X_train, y_train)

    # y_pred = rf.predict(X_test)
    # # accuracy = accuracy_score(y_test, y_pred)
    # # print(f"Accuracy: {accuracy:.2f}")

    accuracy1 = accuracy_score(y_test, y_pred_best)
    print(f"Accuracy: {accuracy1:.2f}")

    auc = roc_auc_score(y_test, y_pred_best)
    print(f"AUC-ROC Score: {auc:.3f}")

    report = classification_report(y_test, y_pred_best)
    print("Classification Report:\n", report)

    cm = confusion_matrix(y_test, y_pred_best)
    print("Confusion Matrix:\n", cm)

In [78]:
# Visual Network
run_rf(X_visual, y_visual, 'Visual Network')


=== Analyzing Visual Network ===
Best parameters: {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
Accuracy: 0.67
AUC-ROC Score: 0.588
Classification Report:
               precision    recall  f1-score   support

           0       0.46      0.38      0.41        16
           1       0.74      0.80      0.77        35

    accuracy                           0.67        51
   macro avg       0.60      0.59      0.59        51
weighted avg       0.65      0.67      0.66        51

Confusion Matrix:
 [[ 6 10]
 [ 7 28]]


In [79]:
# Salience Network
run_rf(X_salience, y_salience, network_name='Salience Network')


=== Analyzing Salience Network ===
Best parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Accuracy: 0.55
AUC-ROC Score: 0.468
Classification Report:
               precision    recall  f1-score   support

           0       0.27      0.25      0.26        16
           1       0.67      0.69      0.68        35

    accuracy                           0.55        51
   macro avg       0.47      0.47      0.47        51
weighted avg       0.54      0.55      0.54        51

Confusion Matrix:
 [[ 4 12]
 [11 24]]


In [80]:
# Central Executive / Frontoparietal Network
run_rf(X_fp, y_fp, network_name='Central Executive Network')


=== Analyzing Central Executive Network ===
Best parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 100}
Accuracy: 0.59
AUC-ROC Score: 0.564
Classification Report:
               precision    recall  f1-score   support

           0       0.38      0.50      0.43        16
           1       0.73      0.63      0.68        35

    accuracy                           0.59        51
   macro avg       0.56      0.56      0.55        51
weighted avg       0.62      0.59      0.60        51

Confusion Matrix:
 [[ 8  8]
 [13 22]]


In [94]:
matrix_DMN

Unnamed: 0,Participant_ID,Condition,MPFC-PCC,MPFC-LP(L),MPFC-LP(R),PCC-LP(L),PCC-LP(R),LP(L)-LP(R),Averaged_CC,ADHD/NT,Gender
0,200,rest,0.322430,0.12195,0.29818,0.524580,0.83889,0.40638,0.41874,NT,F
1,201,rest,0.547730,0.30204,0.41196,0.759300,0.85822,0.79440,0.61228,ADHD,F
2,202,rest,0.607870,0.76637,0.90228,0.883640,0.90088,1.26600,0.88784,NT,F
3,205,rest,0.580950,0.54134,0.64500,0.865910,0.90772,1.24120,0.79702,NT,F
4,207,rest,0.478690,0.32974,0.38599,0.483590,0.51843,0.51397,0.45173,NT,F
...,...,...,...,...,...,...,...,...,...,...,...
163,49,task,0.495390,0.86287,0.51024,0.574510,0.41247,0.68197,0.58957,NT,F
164,50,task,0.130060,0.58762,0.86898,0.638170,0.32941,1.28890,0.64052,NT,M
165,51,task,0.414070,0.71636,0.71250,0.232800,0.55219,0.70223,0.55502,NT,F
166,52,task,0.085158,0.88539,0.81971,0.024801,0.17517,1.11880,0.51817,NT,F


In [106]:
X_combined = pd.concat([X_visual, X_salience, X_fp], ignore_index=True)
y_combined = pd.concat([y_visual, y_salience, y_fp], ignore_index=True)

run_rf(X_combined, y_combined, network_name='Network')


=== Analyzing Network ===
Best parameters: {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Accuracy: 0.61
AUC-ROC Score: 0.557
Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.34      0.40        59
           1       0.65      0.77      0.71        93

    accuracy                           0.61       152
   macro avg       0.57      0.56      0.55       152
weighted avg       0.59      0.61      0.59       152

Confusion Matrix:
 [[20 39]
 [21 72]]


In [105]:
X_combined = pd.concat([X_visual, X_salience, X_fp, X_DMN], ignore_index=True)
y_combined = pd.concat([y_visual, y_salience, y_fp, y_dmn], ignore_index=True)

run_rf(X_combined, y_combined, network_name='Network')



=== Analyzing Network ===
Best parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}
Accuracy: 0.62
AUC-ROC Score: 0.577
Classification Report:
               precision    recall  f1-score   support

           0       0.47      0.42      0.45        73
           1       0.69      0.73      0.71       129

    accuracy                           0.62       202
   macro avg       0.58      0.58      0.58       202
weighted avg       0.61      0.62      0.61       202

Confusion Matrix:
 [[31 42]
 [35 94]]
