In [1]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import pandas as pd
import eipy.ei as e
from eipy.additional_ensembles import MeanAggregation, CES

In [2]:
from eipy.metrics import fmax_score
from sklearn.metrics import roc_auc_score, matthews_corrcoef

metrics = {
            'f_max': fmax_score,
            'auc': roc_auc_score,
            'mcc': matthews_corrcoef
            }

In [3]:
base_predictors = {
                    'ADAB': AdaBoostClassifier(),
                    'XGB': XGBClassifier(),
                    'DT': DecisionTreeClassifier(),
                    'RF': RandomForestClassifier(),
                    'GB': GradientBoostingClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'LR': LogisticRegression(),
                    'NB': GaussianNB(),
                    'MLP': MLPClassifier(),
                    'SVM': SVC(probability=True),
}

In [4]:
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 400

# Number of time steps
n_time_steps = 8

# Number of features at each time step
n_features = 12

# Generate toy dataset with regularity
X_class0 = np.random.randn(n_samples // 2, n_features, n_time_steps) + 1.5  # Add a bias to Class 0
X_class1 = np.random.randn(n_samples // 2, n_features, n_time_steps) - 1.5  # Subtract a bias from Class 1
X = np.concatenate([X_class0, X_class1])

# Generate labels (two classes)
y = np.concatenate([np.zeros(n_samples // 2), np.ones(n_samples // 2)])

# Shuffle the dataset
shuffle_indices = np.random.permutation(n_samples)
X = X[shuffle_indices]
y = y[shuffle_indices]

# Display shapes
print("X.shape:", X.shape)
print("y.shape:", y.shape)

X.shape: (400, 12, 8)
y.shape: (400,)


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [6]:
meta_data = []
for i in range(X_train.shape[-1]):
    EI = e.EnsembleIntegration(
                        base_predictors=base_predictors,
                        k_outer=5,
                        k_inner=5,
                        n_samples=1,
                        sampling_strategy=None,
                        sampling_aggregation=None,
                        n_jobs=-1,
                        metrics=metrics,
                        random_state=38,
                        project_name="diabetes",
                        model_building=True,
                        )
    print(f"generating metadata for timestep {i}")
    EI.fit_base(X_train[:,:,i], y_train, modality_name="stuff")
    meta_data.append([EI.ensemble_training_data, EI.ensemble_test_data, EI.base_summary])

generating metadata for timestep 0
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%




generating metadata for timestep 1
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%




generating metadata for timestep 2
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%




generating metadata for timestep 3
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%




generating metadata for timestep 4
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%




generating metadata for timestep 5
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%




generating metadata for timestep 6
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%




generating metadata for timestep 7
Training base predictors on stuff...
        
... for ensemble performance analysis...


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |██████████|100%
Training final base predictors: |██████████|100%








meta data is arranged across time points and across folds within that. want to aggregate so time points are stacked and folds are kept separate.

In [7]:
LSTM_training_data = [[dfs[0][i] for dfs in meta_data] for i in range(5)]
LSTM_test_data = [[dfs[1][i] for dfs in meta_data] for i in range(5)]

In [8]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Save the model
model.save('lstm_model.h5')

# Load the model
loaded_model = Sequential()
loaded_model.add(LSTM(units=50, input_shape=(X_train.shape[1], X_train.shape[2])))
loaded_model.add(Dense(units=1, activation='sigmoid'))
loaded_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
loaded_model.load_weights('lstm_model.h5')

# Now, 'loaded_model' is ready for inference


2023-12-11 17:24:22.350227: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-11 17:24:22.378485: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-11 17:24:22.378516: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-11 17:24:22.379293: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-11 17:24:22.383929: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-11 17:24:22.384469: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [9]:
ensembles = {"LSTM": loaded_model}
EI = e.EnsembleIntegration(
                    base_predictors=base_predictors,
                    k_outer=5,
                    k_inner=5,
                    n_samples=1,
                    sampling_strategy=None,
                    sampling_aggregation=None,
                    n_jobs=-1,
                    metrics=metrics,
                    random_state=38,
                    project_name="diabetes",
                    model_building=True,
                    )
EI.ensemble_training_data = LSTM_training_data
EI.ensemble_test_data = LSTM_test_data

In [13]:
####HI OVER HERE###
import importlib
importlib.reload(e)
###HEYOOOOOO#

<module 'eipy.ei' from '/home/opc/eipy/eipy/ei.py'>

In [14]:
EI.fit_ensemble(ensemble_predictors=ensembles)

Analyzing ensembles: |          |  0%

Analyzing ensembles: |          |  0%


ValueError: in user code:

    File "/home/opc/.venv/lib64/python3.9/site-packages/keras/src/engine/training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "/home/opc/.venv/lib64/python3.9/site-packages/keras/src/engine/training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/opc/.venv/lib64/python3.9/site-packages/keras/src/engine/training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "/home/opc/.venv/lib64/python3.9/site-packages/keras/src/engine/training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "/home/opc/.venv/lib64/python3.9/site-packages/keras/src/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/opc/.venv/lib64/python3.9/site-packages/keras/src/engine/input_spec.py", line 219, in assert_input_compatibility
        raise ValueError(

    ValueError: Layer "sequential_1" expects 1 input(s), but it received 8 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 10) dtype=float64>, <tf.Tensor 'IteratorGetNext:1' shape=(None, 10) dtype=float64>, <tf.Tensor 'IteratorGetNext:2' shape=(None, 10) dtype=float64>, <tf.Tensor 'IteratorGetNext:3' shape=(None, 10) dtype=float64>, <tf.Tensor 'IteratorGetNext:4' shape=(None, 10) dtype=float64>, <tf.Tensor 'IteratorGetNext:5' shape=(None, 10) dtype=float64>, <tf.Tensor 'IteratorGetNext:6' shape=(None, 10) dtype=float64>, <tf.Tensor 'IteratorGetNext:7' shape=(None, 10) dtype=float64>]
