In [5]:
import numpy as np
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from eipy.ei import EnsembleIntegration
from eipy.additional_ensembles import MeanAggregation, CES
from sklearn.model_selection import train_test_split
import pandas as pd
import pickle as pkl

In [6]:
data = open("/home/opc/block_vol/tadpole/tadpole_data_imptn_norm.pickle", "rb")
data = pkl.load(data)

In [7]:
X_train=data["cn_bl"]["train"]
X_test=data["cn_bl"]["test"]

In [13]:
y_train = pd.read_csv("/home/opc/block_vol/tadpole/tadpole_training_labels.csv")
y_train.drop(columns=["Unnamed: 0"], axis=1, inplace=True)
y_train = y_train["last_dx"].to_numpy()

array(['MCI', 'CN', 'CN', 'CN', 'MCI', 'CN', 'DEM', 'DEM', 'CN', 'CN',
       'CN', 'MCI', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'MCI', 'CN', 'CN',
       'MCI', 'CN', 'CN', 'CN', 'DEM', 'CN', 'CN', 'CN', 'CN', 'DEM',
       'DEM', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'MCI',
       'MCI', 'CN', 'MCI', 'CN', 'CN', 'CN', 'CN', 'MCI', 'CN', 'CN',
       'CN', 'CN', 'CN', 'DEM', 'CN', 'DEM', 'CN', 'CN', 'CN', 'CN', 'CN',
       'CN', 'MCI', 'CN', 'CN', 'MCI', 'CN', 'MCI', 'DEM', 'CN', 'CN',
       'DEM', 'CN', 'CN', 'CN', 'DEM', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'MCI', 'CN', 'CN', 'CN', 'MCI', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'CN', 'CN', 'DEM', 'CN', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'MCI', 'MCI', 'CN', 'CN', 'DEM', 'CN', 'CN', 'CN',
       'DEM', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN

In [14]:
y_test = pd.read_csv("/home/opc/block_vol/tadpole/tadpole_testing_labels.csv")
y_test.drop(columns=["Unnamed: 0"], axis=1, inplace=True)
y_test = y_test["last_dx"].to_numpy()

array(['CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'DEM', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'MCI', 'MCI', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'DEM', 'CN', 'MCI', 'MCI', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'CN', 'MCI', 'CN', 'MCI', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'MCI', 'CN', 'CN', 'DEM', 'CN', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'MCI', 'CN', 'CN', 'CN',
       'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'MCI', 'DEM', 'CN',
       'CN', 'CN', 'MCI', 'CN', 'CN', 'CN', 'CN', 'CN', 'CN', 'MCI',
       'DEM', 'CN', 'MCI', 'MCI', 'CN'], dtype=object)

In [16]:
base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True),
}
meta_predictors = {
                    'Mean' : MeanAggregation(),
                    'CES' : CES(),
                    'S.ADAB': AdaBoostClassifier(),
                    'S.XGB': XGBClassifier(),
                    'S.DT': DecisionTreeClassifier(),
                    "S.RF": RandomForestClassifier(),
                    'S.GB': GradientBoostingClassifier(),
                    'S.KNN': KNeighborsClassifier(),
                    'S.LR': LogisticRegression(),
                    'S.NB': GaussianNB(),
                    'S.MLP': MLPClassifier(),
                    'S.SVM': SVC(probability=True),
}

In [15]:
labels = ["CN", "MCI", "DEM"]
class_EIs = []
for i, label in enumerate(labels):
    # transforming labels
    encoding_dict = {outcome: 0 for outcome in labels}
    encoding_dict[label] = 1
    y_train_i = y_train.map(encoding_dict)
    y_test_i = y_test.map(encoding_dict)
    EI = EnsembleIntegration(
                        base_predictors=base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy="undersampling",
                        sampling_aggregation="mean",
                        n_jobs=-1,
                        random_state=38,
                        project_name="toy",
                        model_building=True,
                        )
    EI.fit_base(X_train, y_train_i)
    EI.train_meta(meta_predictors=meta_predictors)
    class_EIs.append(EI)
    

{'CN': 1, 'MCI': 0, 'DEM': 0}
{'CN': 0, 'MCI': 1, 'DEM': 0}
{'CN': 0, 'MCI': 0, 'DEM': 1}
