In [1]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import pandas as pd
import eipy.ei as e
from eipy.additional_ensembles import MeanAggregation, CES

In [2]:
from eipy.metrics import fmax_score
from sklearn.metrics import roc_auc_score, matthews_corrcoef

metrics = {
            'f_max': fmax_score,
            'auc': roc_auc_score,
            'mcc': matthews_corrcoef
            }

In [3]:
base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True),
}

## data generation

In [4]:
import numpy as np

np.random.seed(38)

n_samples = 600

### longitudinal modalities

In [5]:
import numpy as np
# Number of samples
n_samples = 600

# Number of time steps
n_time_steps = 8

# Number of features at each time step
n_classes = 3

if n_classes == 2:
    n_features = 12
    # Generate toy dataset with regularity
    X_class0 = np.random.randn(n_samples // 2, n_features, n_time_steps) + 1.5  # Add a bias to Class 0
    X_class1 = np.random.randn(n_samples // 2, n_features, n_time_steps) - 1.5  # Subtract a bias from Class 1
    X_l = np.concatenate([X_class0, X_class1])

    # Generate labels (two classes)
    y_l = np.concatenate([np.zeros(n_samples // 2), np.ones(n_samples // 2)])

else:
    n_features = 12*n_classes
    # Generate toy dataset with less order and three classes
    X_class0 = np.random.randn(n_samples // 3, n_features, n_time_steps) + 1.5  # Add a bias to Class 0
    X_class1 = np.random.randn(n_samples // 3, n_features, n_time_steps) - 1.5  # Subtract a bias from Class 1
    X_class2 = np.random.randn(n_samples // 3, n_features, n_time_steps) * 0.5  # Introduce disorder in Class 2
    X_l = np.concatenate([X_class0, X_class1, X_class2])

    # Generate labels (three classes)
    y = np.concatenate([np.zeros(n_samples // 3), np.ones(n_samples // 3), 2 * np.ones(n_samples // 3)])


### static modalities

In [6]:
'''n_classes = 3'''

def generate_data(n_samples, lim):
    """Generate random data in a rectangle"""
    lim = np.array(lim)
    n_features = lim.shape[0]
    data = np.random.random((n_samples, n_features))
    data = (lim[:, 1]-lim[:, 0]) * data + lim[:, 0]
    return data

n_samples_per = int(n_samples/3)

view_0 = np.concatenate((generate_data(n_samples_per, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples_per, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples_per, [[0., 2.], [0., 1.]])))

view_1 = np.concatenate((generate_data(n_samples_per, [[1., 2.], [0., 1.]]),
                         generate_data(n_samples_per, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples_per, [[0., 1.], [0., 1.]])))

view_2 = np.concatenate((generate_data(n_samples_per, [[0., 2.], [0., 1.]]),
                         generate_data(n_samples_per, [[0., 1.], [0., 1.]]),
                         generate_data(n_samples_per, [[1., 2.], [0., 1.]])))

X_s = np.concatenate((view_0, view_1, view_2), axis=1)

In [7]:
# Shuffle the dataset
shuffle_indices = np.random.permutation(n_samples)
X_l = X_l[shuffle_indices]
X_s = X_s[shuffle_indices]
y = y[shuffle_indices]

In [8]:
from sklearn.model_selection import train_test_split
X_train_l, X_test_l, y_train, y_test = train_test_split(X_l, y, test_size=0.25, random_state=38, stratify=y)
X_train_s, X_test_s, _,_ = train_test_split(X_s, y, test_size=0.25, random_state=38, stratify=y)

In [9]:
X_train_l_dict = {"Longitudinal_Modality_1" : X_train_l[:,:12,:],
                "Longitudinal_Modality_2" : X_train_l[:,12:24,:],
                "Longitudinal_Modality_3" : X_train_l[:,24:36,:]}

X_test_l_dict = {"Longitudinal_Modality_1" : X_test_l[:,:12,:],
                "Longitudinal_Modality_2" : X_test_l[:,12:24,:],
                "Longitudinal_Modality_3" : X_test_l[:,24:36,:]}

In [10]:
X_train_s_dict = {"Static_Modality_1" : X_train_s[:,:2],
                "Static_Modality_2" : X_train_s[:,2:4],
                "Static_Modality_3" : X_train_s[:,4:]}

X_test_s_dict = {"Static_Modality_1" : X_test_s[:,:2],
                "Static_Modality_2" : X_test_s[:,2:4],
                "Static_Modality_3" : X_test_s[:,4:]}

In [11]:
####HI OVER HERE###
import importlib
importlib.reload(e)
###HEYOOOOOO#

<module 'eipy.ei' from '/home/opc/eipy/eipy/ei.py'>

# Time Series Training
BPs for every time point, train LSTM, gather outputs for classification

In [12]:
meta_data_l = []
for i in range(X_l.shape[-1]):
    X_train_timestep = {k: v[:,:,i] for k,v in X_train_l_dict.items()}
    EI_for_timestep = e.EnsembleIntegration(
                        base_predictors=base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy=None,
                        sampling_aggregation="mean",
                        n_jobs=-1,
                        metrics=metrics,
                        random_state=38,
                        project_name="time step {i}",
                        model_building=False,
                        )
    print(f"generating metadata for timestep {i}")
    EI_for_timestep.fit_base(X_train_timestep, y_train)
    meta_data_l.append([EI_for_timestep.ensemble_training_data, EI_for_timestep.ensemble_test_data, EI_for_timestep.ensemble_training_data_final, EI_for_timestep.base_summary])

generating metadata for timestep 0
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%




generating metadata for timestep 1
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%




generating metadata for timestep 2
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%




generating metadata for timestep 3
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%




generating metadata for timestep 4
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%




generating metadata for timestep 5
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%




generating metadata for timestep 6
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%




generating metadata for timestep 7
Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






rearrange metadata so it's grouped by fold and then ordered by time steps

In [13]:
LSTM_training_data = [[dfs[0][i] for dfs in meta_data_l] for i in range(5)]
LSTM_test_data = [[dfs[1][i] for dfs in meta_data_l] for i in range(5)]
LSTM_training_data_final = [df[2] for df in meta_data_l]

In [14]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

model = Sequential()

if n_classes == 2:
    model.add(LSTM(units=50, input_shape=(8,30))) # return_sequences=True to get prediction vectors at every time step
    model.add(Dense(units=1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
else:
    model.add(LSTM(units=50, input_shape=(8,30*n_classes))) # return_sequences=True to get prediction vectors at every time step
    model.add(Dense(units=3, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])



2023-12-12 20:25:15.079765: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-12 20:25:15.108321: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-12 20:25:15.108350: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-12 20:25:15.109127: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-12 20:25:15.113876: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-12 20:25:15.114413: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [15]:
ensembles = {"LSTM": model}
LSTMs = e.EnsembleIntegration(
                    base_predictors=base_predictors,
                    k_outer=5,
                    k_inner=5,
                    n_samples=1,
                    sampling_strategy=None,
                    sampling_aggregation=None,
                    n_jobs=-1,
                    metrics=metrics,
                    random_state=38,
                    project_name="test",
                    model_building=False,
                    )
LSTMs.modality_names = ["stuff"]
LSTMs.ensemble_training_data = LSTM_training_data
LSTMs.ensemble_test_data = LSTM_test_data
LSTMs.ensemble_training_data_final = LSTM_training_data_final

In [16]:
LSTMs.ensemble_training_data[0][0]

modality,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,Longitudinal_Modality_1,...,Longitudinal_Modality_3,Longitudinal_Modality_3,Longitudinal_Modality_3,Longitudinal_Modality_3,Longitudinal_Modality_3,Longitudinal_Modality_3,Longitudinal_Modality_3,Longitudinal_Modality_3,Longitudinal_Modality_3,labels
base predictor,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,...,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,Unnamed: 21_level_1
sample,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Unnamed: 21_level_2
class,0,1,2,0,1,2,0,1,2,0,...,0,1,2,0,1,2,0,1,2,Unnamed: 21_level_3
0,5.251190e-01,3.199779e-06,4.748778e-01,0.997148,0.001122,0.001729,1.0,0.0,0.0,1.00,...,1.000000e+00,7.989039e-25,1.953481e-33,9.999856e-01,1.101943e-09,0.000014,0.996735,0.002373,0.000891,0.0
1,5.251190e-01,3.199779e-06,4.748778e-01,0.981133,0.001357,0.017510,1.0,0.0,0.0,0.92,...,1.000000e+00,1.182075e-17,9.729428e-18,9.971371e-01,8.671220e-07,0.002862,0.990386,0.004573,0.005040,0.0
2,5.251190e-01,3.199779e-06,4.748778e-01,0.016918,0.004002,0.979080,1.0,0.0,0.0,0.17,...,3.325784e-10,4.983025e-07,9.999995e-01,3.793939e-03,3.283699e-02,0.963369,0.001620,0.021772,0.976608,2.0
3,6.021478e-09,9.999993e-01,6.501027e-07,0.001693,0.986599,0.011709,0.0,1.0,0.0,0.00,...,1.687843e-23,1.000000e+00,1.040526e-23,3.324525e-07,9.997490e-01,0.000251,0.005977,0.989655,0.004368,1.0
4,4.175610e-01,9.878799e-06,5.824291e-01,0.002239,0.003925,0.993836,0.0,0.0,1.0,0.01,...,4.746891e-08,4.970004e-10,1.000000e+00,2.386376e-02,3.507003e-03,0.972629,0.008536,0.003850,0.987614,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,1.000000e+00,7.882588e-13,8.014463e-12,0.997180,0.000938,0.001882,1.0,0.0,0.0,1.00,...,1.000000e+00,3.331025e-23,2.748271e-42,9.943449e-01,7.050346e-09,0.005655,0.932318,0.057637,0.010045,0.0
356,5.194164e-06,5.004942e-01,4.995006e-01,0.017686,0.002141,0.980172,0.0,0.0,1.0,0.13,...,7.790807e-08,1.463195e-07,9.999998e-01,2.121340e-02,1.279666e-02,0.965990,0.008002,0.013118,0.978880,2.0
357,1.000000e+00,2.075669e-13,4.757039e-10,0.997378,0.000873,0.001749,1.0,0.0,0.0,1.00,...,1.000000e+00,1.160302e-22,3.438703e-27,9.997978e-01,4.115177e-09,0.000202,0.994655,0.003077,0.002269,0.0
358,5.194164e-06,5.004942e-01,4.995006e-01,0.001940,0.001842,0.996218,0.0,0.0,1.0,0.03,...,1.933703e-08,9.689083e-09,1.000000e+00,5.835056e-03,3.841587e-03,0.990323,0.005713,0.005498,0.988789,2.0


In [17]:
LSTMs.fit_ensemble(ensemble_predictors=ensembles)

Analyzing ensembles: |          |  0%

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Analyzing ensembles: |██████████|100%


<eipy.ei.EnsembleIntegration at 0x7f2e6dc2ca00>

In [18]:
LSTMs.ensemble_summary["metrics"]

Unnamed: 0,LSTM
precision,1.0
recall,1.0
f1,1.0


In [19]:
LSTMs.ensemble_predictions

Unnamed: 0,LSTM,labels
0,"[0.0012731026, 0.0012062319, 0.9975207]",2.0
1,"[0.99810755, 0.0008441844, 0.0010482991]",0.0
2,"[0.99809515, 0.0008402936, 0.0010645002]",0.0
3,"[0.99809074, 0.00083391333, 0.0010752946]",0.0
4,"[0.0012862621, 0.0013876104, 0.99732614]",2.0
...,...,...
445,"[0.00012395647, 0.00014904412, 0.9997269]",2.0
446,"[0.00011791702, 0.00014521994, 0.9997368]",2.0
447,"[9.010268e-05, 0.9997647, 0.00014510354]",1.0
448,"[0.9997774, 0.00010366881, 0.000118941134]",0.0


# Static Training

In [20]:
EI = e.EnsembleIntegration(
                    base_predictors=base_predictors,
                    k_outer=5,
                    k_inner=5,
                    n_samples=1,
                    sampling_strategy=None,
                    sampling_aggregation=None,
                    n_jobs=-1,
                    metrics=metrics,
                    random_state=38,
                    project_name="test",
                    model_building=False,
                    )
EI.fit_base(X_train_s_dict, y_train)

Training base predictors on None...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%






In [26]:
EI.ensemble_training_data[0]

modality,Static_Modality_1,Static_Modality_1,Static_Modality_1,Static_Modality_1,Static_Modality_1,Static_Modality_1,Static_Modality_1,Static_Modality_1,Static_Modality_1,Static_Modality_1,...,Static_Modality_3,Static_Modality_3,Static_Modality_3,Static_Modality_3,Static_Modality_3,Static_Modality_3,Static_Modality_3,Static_Modality_3,Static_Modality_3,labels
base predictor,ADAB,ADAB,ADAB,XGB,XGB,XGB,DT,DT,DT,RF,...,NB,NB,NB,MLP,MLP,MLP,SVM,SVM,SVM,Unnamed: 21_level_1
sample,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Unnamed: 21_level_2
class,0,1,2,0,1,2,0,1,2,0,...,0,1,2,0,1,2,0,1,2,Unnamed: 21_level_3
0,5.035709e-01,2.223164e-16,0.496429,0.568579,0.010540,0.420881,0.0,0.0,1.0,0.39,...,0.267710,0.000185,0.732106,0.324079,0.015917,0.660004,0.347970,0.003142,0.648888,0.0
1,5.035709e-01,2.223164e-16,0.496429,0.670562,0.001786,0.327652,1.0,0.0,0.0,0.74,...,0.284377,0.005334,0.710289,0.381597,0.043892,0.574510,0.305196,0.003551,0.691253,0.0
2,2.222862e-16,5.033626e-01,0.496637,0.017583,0.232240,0.750177,0.0,0.0,1.0,0.01,...,0.278408,0.004855,0.716737,0.387690,0.047340,0.564970,0.304786,0.003163,0.692052,2.0
3,2.222862e-16,5.033626e-01,0.496637,0.000732,0.949683,0.049584,0.0,1.0,0.0,0.00,...,0.196304,0.801793,0.001903,0.228024,0.743035,0.028940,0.279475,0.713706,0.006819,1.0
4,5.035709e-01,2.223164e-16,0.496429,0.531488,0.001549,0.466963,1.0,0.0,0.0,0.51,...,0.385285,0.054663,0.560052,0.429536,0.122205,0.448260,0.364585,0.022322,0.613094,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,5.030655e-01,2.222460e-16,0.496935,0.550837,0.000753,0.448411,0.0,0.0,1.0,0.70,...,0.306048,0.000049,0.693903,0.313804,0.013566,0.672630,0.544425,0.013487,0.442088,0.0
356,2.223677e-16,5.039007e-01,0.496099,0.000540,0.994551,0.004909,0.0,1.0,0.0,0.04,...,0.286277,0.001806,0.711917,0.378183,0.031623,0.590194,0.293650,0.004968,0.701382,2.0
357,5.030655e-01,2.222460e-16,0.496935,0.789586,0.003044,0.207370,1.0,0.0,0.0,0.52,...,0.228226,0.771696,0.000077,0.250476,0.730157,0.019367,0.312648,0.681683,0.005669,0.0
358,2.223677e-16,5.039007e-01,0.496099,0.001445,0.970946,0.027609,0.0,1.0,0.0,0.03,...,0.410600,0.107115,0.482285,0.425756,0.186606,0.387638,0.380765,0.053917,0.565318,2.0
