# Balanced Bagging Classifier

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('DTI_with_group.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

In [2]:
from imblearn.ensemble import BalancedBaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(), 
                                sampling_strategy='auto', 
                                replacement=False, 
                                random_state=0)
X = df.iloc[:, df.columns != 'Research Group'].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)
bbc.fit(X_train, y_train)



In [3]:
from sklearn.metrics import balanced_accuracy_score
y_pred = bbc.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.28127016783760345

### Combined MCI groups

In [4]:
df = pd.read_csv('DTI_with_group_comb.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

In [5]:
X = df.iloc[:, df.columns != 'Research Group'].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)
bbc.fit(X_train, y_train)



In [6]:
y_pred = bbc.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.4358451672010994

In [7]:
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
    f1_score,
)
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 9,  7,  4],
       [26, 88, 63],
       [26, 45, 40]])

# Balanced Random Forest Classifier

In [8]:
from imblearn.ensemble import BalancedRandomForestClassifier
brf = BalancedRandomForestClassifier(n_estimators=9000, random_state=0, sampling_strategy="all", replacement=True)

df = pd.read_csv('DTI_with_group.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

X = df.iloc[:, df.columns != 'Research Group'].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)
brf.fit(X_train, y_train)

In [9]:
y_pred = brf.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.3599202879641086

### Combined MCI groups

In [10]:
df = pd.read_csv('DTI_with_group_comb.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

In [11]:
X = df.iloc[:, df.columns != 'Research Group']
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=13)
brf.fit(X_train, y_train)

In [12]:
y_pred = brf.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.49483700290151905

In [13]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[  8,   4,   2],
       [ 28, 113,  45],
       [ 30,  45,  33]])

# Boosting

In [14]:
from imblearn.ensemble import RUSBoostClassifier
rusboost = RUSBoostClassifier(n_estimators=12000, algorithm='SAMME.R', random_state=55)
df = pd.read_csv('DTI_with_group.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

X = df.iloc[:, df.columns != 'Research Group'].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)
rusboost.fit(X_train, y_train)

In [15]:
y_pred = rusboost.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.16560509554140126

### Combined MCI groups

In [16]:
df = pd.read_csv('DTI_with_group_comb.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

X = df.iloc[:, df.columns != 'Research Group'].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=13)
rusboost.fit(X_train, y_train)

In [17]:
y_pred = rusboost.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.40412186379928317

# Easy Ensemble Classifier

In [18]:
from imblearn.ensemble import EasyEnsembleClassifier
eec = EasyEnsembleClassifier(random_state=0)

df = pd.read_csv('DTI_with_group.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

X = df.iloc[:, df.columns != 'Research Group'].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)
eec.fit(X_train, y_train)

In [19]:
y_pred = eec.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.2691562392232542

### Combined MCI groups

In [20]:
df = pd.read_csv('DTI_with_group_comb.csv')
# remove unnecessary columns + confounding
df = df.drop(['IMAGEUID', 'COLPROT', 'RID', 'VISCODE', 
                          'VISCODE2', 'EXAMDATE', 'VERSION', 'MANUFACTURER',
                         'RUNDATE', 'STATUS', 'QC', 'update_stamp', 'Age', 'VOLUMES', 'Sex'], axis=1)
# remove rows with empty values
df = df.dropna()

X = df.iloc[:, df.columns != 'Research Group'].values
y = df.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=19)
eec.fit(X_train, y_train)

In [21]:
y_pred = eec.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.4963624338624339