In [57]:
import tensorflow as tf
import numpy as np
import pandas as pd
import keras
import pickle as pkl
import eipy.deep_ei as d
from scikeras.wrappers import KerasClassifier
import sys

In [82]:
from eipy.metrics import fmax_score, fmax_precision_recall_threshold
from sklearn.metrics import roc_auc_score, matthews_corrcoef

metric_funs = {
            'f_max': fmax_score,
            'auc': roc_auc_score,
            'mcc': matthews_corrcoef
            }

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [4]:
inanimate_labels=[0,1,8,9]
def binarize_array(arr):
    mask = np.isin(arr, inanimate_labels)

    arr = np.where(mask, 0, 1)

    return arr

In [5]:
y_train = binarize_array(y_train)
y_test = binarize_array(y_test)

In [None]:

with open("/home/opc/block_vol/cifar_10_resized_224/X_train.pkl", "rb") as file:
    X_train_resized=pkl.load(file=file)
file.close()
with open("/home/opc/block_vol/cifar_10_resized_224/X_test.pkl", "rb") as file:
    X_test_resized=pkl.load(file=file)
file.close()

In [9]:
from skimage.transform import resize
import pickle as pkl
new_size = (224, 224)

X_train_resized = np.empty((X_train.shape[0], new_size[0], new_size[1], 3))
X_test_resized = np.empty((X_test.shape[0], new_size[0], new_size[1], 3))

for i in range(X_train.shape[0]):
    X_train_resized[i] = resize(X_train[i], new_size, preserve_range=True, mode='reflect')
for i in range(X_test.shape[0]):
    X_test_resized[i] = resize(X_test[i], new_size, preserve_range=True, mode='reflect')

In [10]:
X_sample = X_train_resized[0:5000]
X_sample.shape

(5000, 224, 224, 3)

In [13]:
y_sample = y_train[0:5000]
y_sample.shape
pd.Series(y_sample.flatten()).value_counts()

(5000, 1)

In [30]:
X_test_sample = X_test[0:1000]
y_test_sample = y_test[0:1000]

X_test_sample.shape, pd.Series(y_test_sample.flatten()).value_counts()

((1000, 32, 32, 3),
 1    593
 0    407
 Name: count, dtype: int64)

In [None]:
# with open("/home/opc/block_vol/cifar_10_resized_224/X_train.pkl", "wb") as file:
#     pkl.dump(file=file, obj=X_train_resized)
# with open("/home/opc/block_vol/cifar_10_resized_224/X_test.pkl", "wb") as file:
#     pkl.dump(file=file, obj=X_test_resized)

In [240]:
import importlib
importlib.reload(d)

<module 'eipy.deep_ei' from '/home/opc/eipy/eipy/deep_ei.py'>

In [241]:
from keras import layers, models
from keras.applications import ResNet50
res_model = ResNet50(weights='imagenet', include_top=False)
for layer in res_model.layers:
    layer.trainable = False
averaging_layer = layers.GlobalAveragePooling2D()(res_model.output)
output_layer = layers.Dense(1, activation='sigmoid')(averaging_layer)

res_model = models.Model(inputs=res_model.input, outputs=output_layer)
res_model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [242]:
from keras.applications.mobilenet import MobileNet
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

base_model = MobileNet(weights='imagenet', include_top=False)

x = base_model.output
for layer in base_model.layers:
    layer.trainable = False
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
net_model = Model(inputs=base_model.input, outputs=predictions)

net_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])





In [243]:
base_predictors = {
    "ResNet50" : res_model,
    "MobileNet" : net_model
}

In [244]:
EI = d.EnsembleIntegration(
                        k_outer=2,
                        k_inner=2,
                        n_samples=1,
                        sampling_aggregation=None,
                        sampling_strategy=None,
                        n_jobs=1,# -1 could be messing up for one bp
                        metrics=metric_funs,
                        random_state=38,
                        project_name="images",
                        model_building=True,
                        )

In [245]:
EI.fit_base(X_sample, y_sample, modality_name="images", base_predictors=base_predictors)

Training base predictors on images...
        
... for ensemble performance analysis...




0.0
-0.007853963
0.0
-0.010690699
0.0
-0.004308817
0.0
-0.010875642




0.0
-0.009792602


Training final base predictors: |          |  0%


0.0
-0.004804369
0.0
-0.0008395787
0.0
0.0015880498


Generating ensemble training data: |██████████|100%
Generating ensemble test data: |          |  0%

0.0
-0.007275095
0.0
-0.004608715


Generating ensemble test data: |█████     | 50%

0.0
-0.013026578
0.0
-0.007893192


Generating ensemble test data: |██████████|100%



... for final ensemble...


Generating ensemble training data: |          |  0%

0.0
-0.009899473
0.0
-0.007279485
0.0
-0.0025825612
0.0
-0.0038712122


Generating ensemble training data: |██████████|100%
Training final base predictors: |          |  0%

0.0
-0.008674028
INFO:tensorflow:Assets written to: ram://830d8a35bbed4536b5a16ecdcb5ac248/assets


INFO:tensorflow:Assets written to: ram://830d8a35bbed4536b5a16ecdcb5ac248/assets
Training final base predictors: |█████     | 50%

0.0
0.0032789656
INFO:tensorflow:Assets written to: ram://eaa7bc8b0c7c42a6a706612aa3f13d4d/assets


INFO:tensorflow:Assets written to: ram://eaa7bc8b0c7c42a6a706612aa3f13d4d/assets
Training final base predictors: |██████████|100%








In [246]:
EI.base_summary["metrics"]

modality,images,images
base predictor,MobileNet,ResNet50
f_max,0.803702,0.93701
auc,0.910865,0.988427
mcc,0.599036,0.889418


In [247]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
import pandas as pd
from eipy.ei import EnsembleIntegration
from eipy.additional_ensembles import MeanAggregation, CES
ensemble_predictors = {
                    'Mean' : MeanAggregation(),
                    'CES' : CES(scoring=lambda y_test, y_pred: fmax_score(y_test, y_pred)[0]),
                    'S.ADAB': AdaBoostClassifier(),
                    'S.XGB': XGBClassifier(),
                    'S.DT': DecisionTreeClassifier(),
                    "S.RF": RandomForestClassifier(),
                    'S.GB': GradientBoostingClassifier(),
                    'S.KNN': KNeighborsClassifier(),
                    'S.LR': LogisticRegression(),
                    'S.NB': GaussianNB(),
                    'S.MLP': MLPClassifier(),
                    'S.SVM': SVC(probability=True),
}
EI.fit_ensemble(ensemble_predictors=ensemble_predictors)

Analyzing ensembles: |██████████|100%
Training final ensemble models: |██████████|100%


<eipy.deep_ei.EnsembleIntegration at 0x7f76f85cc5b0>

In [249]:
EI.ensemble_summary["metrics"]

Unnamed: 0,Mean,CES,S.ADAB,S.XGB,S.DT,S.RF,S.GB,S.KNN,S.LR,S.NB,S.MLP,S.SVM
f_max,0.938653,0.938653,0.920273,0.893595,0.869881,0.904255,0.901618,0.906327,0.930569,0.931784,0.930639,0.926485
auc,0.984565,0.984565,0.974385,0.971574,0.890448,0.971116,0.973057,0.964481,0.981685,0.980282,0.98185,0.941156
mcc,0.897544,0.897544,0.866935,0.818672,0.789072,0.838173,0.837258,0.841436,0.881015,0.886039,0.880176,0.874358


In [250]:
X_dict = {"images": X_test_sample}

In [251]:
def model_key(metric):
    return EI.ensemble_summary["metrics"].loc[f"{metric}"].idxmax()

In [255]:
metrics = EI.ensemble_summary["metrics"].index.tolist()
y_preds = {}
for metric in metrics:
    y_preds[metric] = EI.predict(X_dict=X_dict, ensemble_model_key=model_key(metric))

2023-11-30 20:48:19.940940: W tensorflow/core/util/tensor_slice_reader.cc:98] Could not open ram://bd423544612c45cb8affce0cd1953dea: INVALID_ARGUMENT: ram://bd423544612c45cb8affce0cd1953dea is a directory.




2023-11-30 20:48:23.315051: W tensorflow/core/util/tensor_slice_reader.cc:98] Could not open ram://1f74dab6b60b47a28dcd985c65ba693d: INVALID_ARGUMENT: ram://1f74dab6b60b47a28dcd985c65ba693d is a directory.
















































































2023-11-30 20:48:30.145392: W tensorflow/core/util/tensor_slice_reader.cc:98] Could not open ram://52cbd27b395c40a38128e9b5596efb7b: INVALID_ARGUMENT: ram://52cbd27b395c40a38128e9b5596efb7b is a directory.




2023-11-30 20:48:33.479333: W tensorflow/core/util/tensor_slice_reader.cc:98] Could not open ram://896b15c12856449e81d56e05f445b8fb: INVALID_ARGUMENT: ram://896b15c12856449e81d56e05f445b8fb is a directory.




2023-11-30 20:48:37.876220: W tensorflow/core/util/tensor_slice_reader.cc:98] Could not open ram://eeb1c0d7467b47a2bbdc0fb67b0c9b30: INVALID_ARGUMENT: ram://eeb1c0d7467b47a2bbdc0fb67b0c9b30 is a directory.








































2023-11-30 20:48:43.371726: W tensorflow/core/util/tensor_slice_reader.cc:98] Could not open ram://c05fff0cb54d4b7c83edb48a09019294: INVALID_ARGUMENT: ram://c05fff0cb54d4b7c83edb48a09019294 is a directory.




In [256]:
y_test_sample

array([[1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
    

In [46]:
results={}
for k,v in y_preds.items():
    results[k]= metric_funs[k](y_test, v)

ValueError: Classification metrics can't handle a mix of binary and continuous targets

In [257]:
roc_auc_score(y_test_sample, y_preds["auc"])

0.8652253357143745

In [258]:
fmax_score(y_test_sample, y_preds["f_max"])

(0.8396551724137931, 0.3244857)