In [8]:
import pandas as pd
import numpy as np

import autorootcwd  # noqa
from hamilton import driver

from src.data import data_pipeline
from src.data.pydantic_models import BearingDataset
from functools import partial
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.model_selection import (
    RandomizedSearchCV,
    GroupKFold,
    cross_validate,
    cross_val_predict,
)
from scipy.stats import randint, loguniform
import logging
import random
from src.utils.add_signal_data import add_signal_data_paderborn, add_signal_data_ottawa, add_signal_data_cwru

In [9]:
from src.data.data_splits import train_test_split_proposed
from itertools import combinations, product
import random

In [10]:
healthy_bearing_ids = ["K001", "K002", "K003", "K004", "K005", "K006"]
outer_bearing_ids = ["KA04", "KA15", "KA16", "KA22", "KA30"]
inner_bearing_ids = ["KI04", "KI14", "KI16", "KI18", "KI21", "KI17"]


class CombinationSampler:
    def __init__(self, list1, list2, list3, random_seed=42):
        self.combinations = list(product(list1, list2, list3))
        random.seed(random_seed)
        random.shuffle(self.combinations)

    def get_combination(self, run: int):
        if not self.combinations:
            raise ValueError("No more combinations available")
        combination = self.combinations[run]

        return combination


list1 = [
    ["K001", "K003"],
    ["K001", "K002"],
    ["K001", "K004"],
    ["K001", "K005"],
    ["K001", "K006"],
    ["K002", "K003"],
    ["K002", "K004"],
    ["K002", "K005"],
    ["K002", "K006"],
    ["K003", "K004"],
    ["K003", "K005"],
    ["K003", "K006"],
    ["K004", "K005"],
    ["K004", "K006"],
    ["K005", "K006"],
]

list2 = [
    ["KA04", "KA16"],
    ["KA04", "KA15"],
    ["KA04", "KA22"],
    ["KA04", "KA30"],
    ["KA15", "KA16"],
    ["KA15", "KA22"],
    ["KA15", "KA30"],
    ["KA16", "KA22"],
    ["KA16", "KA30"],
    ["KA22", "KA30"],
]

list3 = [
    ["KI04", "KI16"],
    ["KI04", "KI14"],
    ["KI04", "KI21"],
    ["KI04", "KI18"],
    ["KI04", "KI17"],
    ["KI14", "KI16"],
    ["KI14", "KI18"],
    ["KI14", "KI21"],
    ["KI14", "KI17"],
    ["KI16", "KI18"],
    ["KI16", "KI21"],
    ["KI16", "KI17"],
    ["KI18", "KI21"],
    ["KI18", "KI17"],
    ["KI21", "KI17"],
]

sampler = CombinationSampler(list1, list2, list3)

def get_cv_splits(
    df: pd.DataFrame,
    sampler: CombinationSampler,
    run: int,
    healthy_bearing_ids: list,
    outer_bearing_ids: list,
    inner_bearing_ids: list,
    use_combined: bool = False,
    on_train: bool = False,
):

    combination = sampler.get_combination(run=run)

    train_ids = (
        list(set(healthy_bearing_ids).difference(set(combination[0])))
        + list(set(outer_bearing_ids).difference(set(combination[1])))
        + list(set(inner_bearing_ids).difference(set(combination[2])))
    )

    test_ids = list(combination[0] + combination[1] + combination[2])

    if use_combined:
        combined_bearings_ids = ["KB23", "KB24", "KB27"]
        if on_train:
            print(f"Using combined bearings on train: {combined_bearings_ids}")
            train_ids += combined_bearings_ids
        else:
            print(f"Using combined bearings on test: {combined_bearings_ids}")
            test_ids += combined_bearings_ids

    train_idx = df[df["bearing_id"].isin(train_ids)].index.values
    test_idx = df[df["bearing_id"].isin(test_ids)].index.values

    cv = (train_idx, test_idx)

    return cv

In [4]:
metadata = pd.read_pickle("/data/bearing_datasets/paderborn/processed/files_metadata.bz2")
features = pd.read_pickle("/home/joao-paulo-vieira/mestrado/leakage_paper_experiments/data/features/pd_features.pkl")

In [5]:
df = features.merge(metadata, on="waveform_id", how="left")

In [6]:
cvs = [get_cv_splits(sampler=sampler, df=df, run=run,
 healthy_bearing_ids=healthy_bearing_ids, outer_bearing_ids=outer_bearing_ids,
  inner_bearing_ids=inner_bearing_ids, use_combined=False, on_train=False)
 for run in range(5)]

In [7]:
features = ['acceleration/rms/global', 'acceleration/pk-pk/global',
       'acceleration/kurt/global', 'acceleration/skewness/global',
       'acceleration/fc/global', 'acceleration/shape_factor/global',
       'acceleration/impulse_factor/global',
       'acceleration/clearance_factor/global', 'acceleration/fcPlus/global',
       'envelope/spectralPeak/1.0x-bpfo/50-1000',
       'envelope/spectralPeak/2.0x-bpfo/50-1000',
       'envelope/spectralPeak/3.0x-bpfo/50-1000',
       'envelope/spectralPeak/4.0x-bpfo/50-1000',
       'envelope/spectralPeak/5.0x-bpfo/50-1000',
       'envelope/spectralPeak/1.0x-bpfi/50-1000',
       'envelope/spectralPeak/2.0x-bpfi/50-1000',
       'envelope/spectralPeak/3.0x-bpfi/50-1000',
       'envelope/spectralPeak/4.0x-bpfi/50-1000',
       'envelope/spectralPeak/5.0x-bpfi/50-1000',
       'envelope/spectralPeak/1.0x-bpfo/50-5000',
       'envelope/spectralPeak/2.0x-bpfo/50-5000',
       'envelope/spectralPeak/3.0x-bpfo/50-5000',
       'envelope/spectralPeak/4.0x-bpfo/50-5000',
       'envelope/spectralPeak/5.0x-bpfo/50-5000',
       'envelope/spectralPeak/1.0x-bpfi/50-5000',
       'envelope/spectralPeak/2.0x-bpfi/50-5000',
       'envelope/spectralPeak/3.0x-bpfi/50-5000',
       'envelope/spectralPeak/4.0x-bpfi/50-5000',
       'envelope/spectralPeak/5.0x-bpfi/50-5000',
       'envelope/spectralPeak/1.0x-bpfo/500-10000',
       'envelope/spectralPeak/2.0x-bpfo/500-10000',
       'envelope/spectralPeak/3.0x-bpfo/500-10000',
       'envelope/spectralPeak/4.0x-bpfo/500-10000',
       'envelope/spectralPeak/5.0x-bpfo/500-10000',
       'envelope/spectralPeak/1.0x-bpfi/500-10000',
       'envelope/spectralPeak/2.0x-bpfi/500-10000',
       'envelope/spectralPeak/3.0x-bpfi/500-10000',
       'envelope/spectralPeak/4.0x-bpfi/500-10000',
       'envelope/spectralPeak/5.0x-bpfi/500-10000',
       'envelope/spectralPeak/1.0x-bsf/500-10000',]

In [29]:
X = df[features].copy()
y = df[['inner', 'outer']].copy()

# Features (segmented)

## RF

In [31]:
random_search = RandomizedSearchCV(
    estimator=MultiOutputClassifier(RandomForestClassifier()),
    param_distributions={
        "estimator__n_estimators": [200],
        "estimator__max_features": ["sqrt", "log2"],
        "estimator__criterion": ["gini", "entropy", "log_loss"],
        "estimator__max_depth": randint(low=2, high=60),
        "estimator__min_samples_split": randint(low=2, high=20),
        "estimator__min_samples_leaf": randint(low=1, high=20),
        "estimator__ccp_alpha": loguniform(1e-5, 1),
    },
    n_iter=250,
    cv=cvs,
    scoring="roc_auc",
    verbose=2,
    n_jobs=-1,
)

random_search.fit(X, y)

Fitting 5 folds for each of 250 candidates, totalling 1250 fits


[CV] END estimator__ccp_alpha=0.0013140714793113144, estimator__criterion=log_loss, estimator__max_depth=2, estimator__max_features=log2, estimator__min_samples_leaf=12, estimator__min_samples_split=14, estimator__n_estimators=200; total time=   1.7s
[CV] END estimator__ccp_alpha=0.0013140714793113144, estimator__criterion=log_loss, estimator__max_depth=2, estimator__max_features=log2, estimator__min_samples_leaf=12, estimator__min_samples_split=14, estimator__n_estimators=200; total time=   1.8s
[CV] END estimator__ccp_alpha=0.0013140714793113144, estimator__criterion=log_loss, estimator__max_depth=2, estimator__max_features=log2, estimator__min_samples_leaf=12, estimator__min_samples_split=14, estimator__n_estimators=200; total time=   2.0s
[CV] END estimator__ccp_alpha=0.0013140714793113144, estimator__criterion=log_loss, estimator__max_depth=2, estimator__max_features=log2, estimator__min_samples_leaf=12, estimator__min_samples_split=14, estimator__n_estimators=200; total time=   2

In [36]:
results = pd.DataFrame(random_search.cv_results_).sort_values(by="rank_test_score")

In [37]:
results.head(5)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_estimator__ccp_alpha,param_estimator__criterion,param_estimator__max_depth,param_estimator__max_features,param_estimator__min_samples_leaf,param_estimator__min_samples_split,param_estimator__n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
244,2.106834,0.131136,0.036123,0.012235,0.000115,entropy,2,sqrt,5,14,200,{'estimator__ccp_alpha': 0.0001151071535527357...,0.909022,0.924109,0.804226,0.417087,0.87842,0.786573,0.189302,1
146,2.167267,0.162956,0.030442,0.000663,0.000163,gini,3,sqrt,8,14,200,{'estimator__ccp_alpha': 0.0001634560999475142...,0.901383,0.864233,0.731859,0.518267,0.907011,0.78455,0.147431,2
22,5.752028,0.246037,0.030641,0.000281,0.058223,entropy,18,sqrt,6,2,200,"{'estimator__ccp_alpha': 0.0582229531287423, '...",0.893516,0.766082,0.750279,0.611505,0.875999,0.779476,0.101556,3
88,6.625231,0.329033,0.029779,0.000698,0.07724,entropy,23,sqrt,15,3,200,"{'estimator__ccp_alpha': 0.07723959535546486, ...",0.897906,0.83097,0.739866,0.543856,0.882745,0.779069,0.129943,4
13,4.548373,0.20491,0.044239,0.022174,0.02281,gini,11,sqrt,5,9,200,"{'estimator__ccp_alpha': 0.022809539117521635,...",0.866517,0.785698,0.722663,0.635726,0.880157,0.778152,0.091252,5


In [38]:
results.loc[22]

mean_fit_time                                                                  5.752028
std_fit_time                                                                   0.246037
mean_score_time                                                                0.030641
std_score_time                                                                 0.000281
param_estimator__ccp_alpha                                                     0.058223
param_estimator__criterion                                                      entropy
param_estimator__max_depth                                                           18
param_estimator__max_features                                                      sqrt
param_estimator__min_samples_leaf                                                     6
param_estimator__min_samples_split                                                    2
param_estimator__n_estimators                                                       200
params                          

In [55]:
best_params = results.loc[22]["params"]

In [56]:
best_params.values()

dict_values([np.float64(0.0582229531287423), 'entropy', 18, 'sqrt', 6, 2, 200])

In [57]:
keys = []
for i in best_params.keys():
    keys.append(i.replace("estimator__", ""))

In [58]:
best_params = {k: i for (k,i) in zip(keys, best_params.values())}

In [59]:
best_params

{'ccp_alpha': np.float64(0.0582229531287423),
 'criterion': 'entropy',
 'max_depth': 18,
 'max_features': 'sqrt',
 'min_samples_leaf': 6,
 'min_samples_split': 2,
 'n_estimators': 200}

In [None]:
from sklearn.metrics import roc_auc_score

#### WITHOUT COMBINEDS
cvs = [get_cv_splits(sampler=sampler, df=df, run=run,
 healthy_bearing_ids=healthy_bearing_ids, outer_bearing_ids=outer_bearing_ids,
  inner_bearing_ids=inner_bearing_ids, use_combined=False, on_train=False)
 for run in range(5,105)]

aucs = []
for cv in cvs:

    X_train = X.iloc[cv[0]]
    X_test = X.iloc[cv[1]]

    y_train = y.iloc[cv[0]]
    y_test = y.iloc[cv[1]]

    model = MultiOutputClassifier(RandomForestClassifier(random_state=42, **best_params), n_jobs=-1)

    model.fit(X_train, y_train)
    y_probas = model.predict_proba(X_test)
    proba_outer = y_probas[1][:, 1]
    proba_inner = y_probas[0][:, 1]

    auroc_outer = roc_auc_score(y_test["outer"], proba_outer)
    auroc_inner = roc_auc_score(y_test["inner"], proba_inner)

    macro_auc = np.mean([auroc_outer, auroc_inner])
    print(f"Macro AUC: {macro_auc:.4f} | Outer AUC: {auroc_outer:.4f} | Inner AUC: {auroc_inner:.4f}")
    aucs.append([macro_auc, auroc_outer, auroc_inner])


Macro AUC: 0.7553 | Outer AUC: 0.7540 | Inner AUC: 0.7566
Macro AUC: 0.8695 | Outer AUC: 0.7990 | Inner AUC: 0.9399
Macro AUC: 0.3801 | Outer AUC: 0.3969 | Inner AUC: 0.3633
Macro AUC: 0.8487 | Outer AUC: 0.7462 | Inner AUC: 0.9513
Macro AUC: 0.6929 | Outer AUC: 0.6347 | Inner AUC: 0.7512
Macro AUC: 0.7232 | Outer AUC: 0.7065 | Inner AUC: 0.7399
Macro AUC: 0.6385 | Outer AUC: 0.4150 | Inner AUC: 0.8620
Macro AUC: 0.2169 | Outer AUC: 0.0170 | Inner AUC: 0.4167
Macro AUC: 0.8983 | Outer AUC: 0.8223 | Inner AUC: 0.9743
Macro AUC: 0.8954 | Outer AUC: 0.8413 | Inner AUC: 0.9496
Macro AUC: 0.7095 | Outer AUC: 0.5648 | Inner AUC: 0.8543
Macro AUC: 0.4692 | Outer AUC: 0.2150 | Inner AUC: 0.7233
Macro AUC: 0.7376 | Outer AUC: 0.7319 | Inner AUC: 0.7432
Macro AUC: 0.8959 | Outer AUC: 0.8544 | Inner AUC: 0.9373
Macro AUC: 0.8395 | Outer AUC: 0.8145 | Inner AUC: 0.8646
Macro AUC: 0.8619 | Outer AUC: 0.8343 | Inner AUC: 0.8895
Macro AUC: 0.5426 | Outer AUC: 0.7402 | Inner AUC: 0.3450
Macro AUC: 0.3

In [80]:
macro_aucs = [i[0] for i in aucs]
np.mean(macro_aucs), np.std(macro_aucs)

(np.float64(0.6752505168952146), np.float64(0.20016290954185018))

In [81]:
from sklearn.metrics import roc_auc_score

#### WITH COMBINED 
cvs = [get_cv_splits(sampler=sampler, df=df, run=run,
 healthy_bearing_ids=healthy_bearing_ids, outer_bearing_ids=outer_bearing_ids,
  inner_bearing_ids=inner_bearing_ids, use_combined=True, on_train=False)
 for run in range(5,105)]

aucs = []
for cv in cvs:

    X_train = X.iloc[cv[0]]
    X_test = X.iloc[cv[1]]

    y_train = y.iloc[cv[0]]
    y_test = y.iloc[cv[1]]

    model = MultiOutputClassifier(RandomForestClassifier(random_state=42, **best_params), n_jobs=-1)

    model.fit(X_train, y_train)
    y_probas = model.predict_proba(X_test)
    proba_outer = y_probas[1][:, 1]
    proba_inner = y_probas[0][:, 1]

    auroc_outer = roc_auc_score(y_test["outer"], proba_outer)
    auroc_inner = roc_auc_score(y_test["inner"], proba_inner)

    macro_auc = np.mean([auroc_outer, auroc_inner])
    print(f"Macro AUC: {macro_auc:.4f} | Outer AUC: {auroc_outer:.4f} | Inner AUC: {auroc_inner:.4f}")
    aucs.append([macro_auc, auroc_outer, auroc_inner])


Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined

In [82]:
macro_aucs = [i[0] for i in aucs]
np.mean(macro_aucs), np.std(macro_aucs)

(np.float64(0.6816634324097492), np.float64(0.14772935620341357))

## SVM

In [83]:
cvs = [get_cv_splits(sampler=sampler, df=df, run=run,
 healthy_bearing_ids=healthy_bearing_ids, outer_bearing_ids=outer_bearing_ids,
  inner_bearing_ids=inner_bearing_ids, use_combined=False, on_train=False)
 for run in range(5)]

In [85]:
from sklearn.svm import SVC

random_search = RandomizedSearchCV(
    estimator=MultiOutputClassifier(SVC(probability=True)),
    param_distributions={
        "estimator__C": loguniform(1e-3, 1e3),
        "estimator__gamma": ["scale", "auto"],
        "estimator__kernel": ["rbf"],
    },
    n_iter=250,
    cv=cvs,
    scoring="roc_auc",
    verbose=2,
    n_jobs=-1,
)

random_search.fit(X, y)

Fitting 5 folds for each of 250 candidates, totalling 1250 fits


[CV] END estimator__C=5.67849118947293, estimator__gamma=auto, estimator__kernel=rbf; total time=   3.2s
[CV] END estimator__C=5.67849118947293, estimator__gamma=auto, estimator__kernel=rbf; total time=   3.2s
[CV] END estimator__C=5.67849118947293, estimator__gamma=auto, estimator__kernel=rbf; total time=   3.4s
[CV] END estimator__C=5.67849118947293, estimator__gamma=auto, estimator__kernel=rbf; total time=   3.7s
[CV] END estimator__C=1.7284155717424479, estimator__gamma=auto, estimator__kernel=rbf; total time=   3.8s
[CV] END estimator__C=73.8235760253899, estimator__gamma=scale, estimator__kernel=rbf; total time=   4.0s
[CV] END estimator__C=1.7284155717424479, estimator__gamma=auto, estimator__kernel=rbf; total time=   4.3s
[CV] END estimator__C=1.7284155717424479, estimator__gamma=auto, estimator__kernel=rbf; total time=   4.7s
[CV] END estimator__C=14.69716572373262, estimator__gamma=scale, estimator__kernel=rbf; total time=   5.2s
[CV] END estimator__C=73.8235760253899, estima

In [86]:
results = pd.DataFrame(random_search.cv_results_).sort_values(by="rank_test_score")

In [91]:
results.iloc[0]

mean_fit_time                                                       4.731125
std_fit_time                                                         0.68196
mean_score_time                                                     0.444534
std_score_time                                                      0.085182
param_estimator__C                                                 73.823576
param_estimator__gamma                                                 scale
param_estimator__kernel                                                  rbf
params                     {'estimator__C': 73.8235760253899, 'estimator_...
split0_test_score                                                   0.855259
split1_test_score                                                   0.685216
split2_test_score                                                   0.545223
split3_test_score                                                   0.503153
split4_test_score                                                    0.70019

In [92]:
best_params = results.iloc[0]["params"]

In [93]:
best_params.values()

dict_values([np.float64(73.8235760253899), 'scale', 'rbf'])

In [94]:
keys = []
for i in best_params.keys():
    keys.append(i.replace("estimator__", ""))

In [95]:
best_params = {k: i for (k,i) in zip(keys, best_params.values())}

In [96]:
best_params

{'C': np.float64(73.8235760253899), 'gamma': 'scale', 'kernel': 'rbf'}

In [97]:
from sklearn.metrics import roc_auc_score

#### WITHOUT COMBINEDS
cvs = [get_cv_splits(sampler=sampler, df=df, run=run,
 healthy_bearing_ids=healthy_bearing_ids, outer_bearing_ids=outer_bearing_ids,
  inner_bearing_ids=inner_bearing_ids, use_combined=False, on_train=False)
 for run in range(5,105)]

aucs = []
for cv in cvs:

    X_train = X.iloc[cv[0]]
    X_test = X.iloc[cv[1]]

    y_train = y.iloc[cv[0]]
    y_test = y.iloc[cv[1]]

    model = MultiOutputClassifier(SVC(random_state=42, **best_params, probability=True), n_jobs=-1)

    model.fit(X_train, y_train)
    y_probas = model.predict_proba(X_test)
    proba_outer = y_probas[1][:, 1]
    proba_inner = y_probas[0][:, 1]

    auroc_outer = roc_auc_score(y_test["outer"], proba_outer)
    auroc_inner = roc_auc_score(y_test["inner"], proba_inner)

    macro_auc = np.mean([auroc_outer, auroc_inner])
    print(f"Macro AUC: {macro_auc:.4f} | Outer AUC: {auroc_outer:.4f} | Inner AUC: {auroc_inner:.4f}")
    aucs.append([macro_auc, auroc_outer, auroc_inner])


Macro AUC: 0.5727 | Outer AUC: 0.7267 | Inner AUC: 0.4187
Macro AUC: 0.5464 | Outer AUC: 0.7860 | Inner AUC: 0.3068
Macro AUC: 0.2722 | Outer AUC: 0.3988 | Inner AUC: 0.1455
Macro AUC: 0.5850 | Outer AUC: 0.6949 | Inner AUC: 0.4752
Macro AUC: 0.5517 | Outer AUC: 0.5967 | Inner AUC: 0.5067
Macro AUC: 0.4587 | Outer AUC: 0.7118 | Inner AUC: 0.2057
Macro AUC: 0.3553 | Outer AUC: 0.4352 | Inner AUC: 0.2754
Macro AUC: 0.3440 | Outer AUC: 0.3594 | Inner AUC: 0.3286
Macro AUC: 0.7099 | Outer AUC: 0.8150 | Inner AUC: 0.6049
Macro AUC: 0.9022 | Outer AUC: 0.8654 | Inner AUC: 0.9389
Macro AUC: 0.6048 | Outer AUC: 0.5822 | Inner AUC: 0.6273
Macro AUC: 0.4801 | Outer AUC: 0.3592 | Inner AUC: 0.6009
Macro AUC: 0.5613 | Outer AUC: 0.6539 | Inner AUC: 0.4688
Macro AUC: 0.6462 | Outer AUC: 0.8493 | Inner AUC: 0.4430
Macro AUC: 0.8653 | Outer AUC: 0.8942 | Inner AUC: 0.8363
Macro AUC: 0.8112 | Outer AUC: 0.8237 | Inner AUC: 0.7987
Macro AUC: 0.6335 | Outer AUC: 0.8750 | Inner AUC: 0.3919
Macro AUC: 0.3

In [98]:
macro_aucs = [i[0] for i in aucs]
np.mean(macro_aucs), np.std(macro_aucs)

(np.float64(0.5564388467419942), np.float64(0.17488943549310645))

In [99]:
from sklearn.metrics import roc_auc_score

#### WITH COMBINEDS
cvs = [get_cv_splits(sampler=sampler, df=df, run=run,
 healthy_bearing_ids=healthy_bearing_ids, outer_bearing_ids=outer_bearing_ids,
  inner_bearing_ids=inner_bearing_ids, use_combined=True, on_train=False)
 for run in range(5,105)]

aucs = []
for cv in cvs:

    X_train = X.iloc[cv[0]]
    X_test = X.iloc[cv[1]]

    y_train = y.iloc[cv[0]]
    y_test = y.iloc[cv[1]]

    model = MultiOutputClassifier(SVC(random_state=42, **best_params, probability=True), n_jobs=-1)

    model.fit(X_train, y_train)
    y_probas = model.predict_proba(X_test)
    proba_outer = y_probas[1][:, 1]
    proba_inner = y_probas[0][:, 1]

    auroc_outer = roc_auc_score(y_test["outer"], proba_outer)
    auroc_inner = roc_auc_score(y_test["inner"], proba_inner)

    macro_auc = np.mean([auroc_outer, auroc_inner])
    print(f"Macro AUC: {macro_auc:.4f} | Outer AUC: {auroc_outer:.4f} | Inner AUC: {auroc_inner:.4f}")
    aucs.append([macro_auc, auroc_outer, auroc_inner])


Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined bearings on test: ['KB23', 'KB24', 'KB27']
Using combined

In [100]:
macro_aucs = [i[0] for i in aucs]
np.mean(macro_aucs), np.std(macro_aucs)

(np.float64(0.5212559482818315), np.float64(0.14057162451219973))