In [5]:
import numpy as np
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [6]:
data = load_breast_cancer()
X, y = data['data'], data['target']
feature_names, target_names = data['feature_names'], data['target_names']

pipeline = Pipeline([('resampling', SMOTE(random_state=42)), # Substituir o SDV aqui, usando um wrapper para implementar fit_resample
                     ('scaler', StandardScaler()),
                     ('svc', SVC(kernel='rbf', probability=True))])

grid_search_svc = {'svc__C': 2 ** np.arange(-5.0, 16.0, 2),
                   'svc__gamma': 2 ** np.arange(-15.0, 4.0, 2)}

In [7]:
np.random.seed(42)

num_outer_loop_folds = 5
num_inner_loop_folds = 5

folds = KFold(n_splits=num_outer_loop_folds, shuffle=True).split(X, y)
results = []
for i, (train_index, test_index) in enumerate(folds):
    print(f"Training fold {i+1}...")

    X_train, y_train = X[train_index,:], y[train_index]
    X_test, y_test = X[test_index,:], y[test_index]
    
    model = GridSearchCV(pipeline, grid_search_svc, cv=num_inner_loop_folds, n_jobs=-1)
    model.fit(X_train, y_train)

    pred_test = model.predict(X_test)
    results.append(classification_report(y_test, pred_test, output_dict=True))

Training fold 1...
Training fold 2...
Training fold 3...
Training fold 4...
Training fold 5...


In [8]:
results

[{'0': {'precision': 1.0,
   'recall': 0.9534883720930233,
   'f1-score': 0.9761904761904763,
   'support': 43},
  '1': {'precision': 0.9726027397260274,
   'recall': 1.0,
   'f1-score': 0.9861111111111112,
   'support': 71},
  'accuracy': 0.9824561403508771,
  'macro avg': {'precision': 0.9863013698630136,
   'recall': 0.9767441860465116,
   'f1-score': 0.9811507936507937,
   'support': 114},
  'weighted avg': {'precision': 0.9829367940398942,
   'recall': 0.9824561403508771,
   'f1-score': 0.9823691172375383,
   'support': 114}},
 {'0': {'precision': 0.9487179487179487,
   'recall': 1.0,
   'f1-score': 0.9736842105263158,
   'support': 37},
  '1': {'precision': 1.0,
   'recall': 0.974025974025974,
   'f1-score': 0.9868421052631579,
   'support': 77},
  'accuracy': 0.9824561403508771,
  'macro avg': {'precision': 0.9743589743589743,
   'recall': 0.987012987012987,
   'f1-score': 0.9802631578947368,
   'support': 114},
  'weighted avg': {'precision': 0.9833558254610886,
   'recall': 0.

In [4]:
acc_mean = np.mean([ result['accuracy'] for result in results ])
acc_std = np.std([ result['accuracy'] for result in results ])
f1_mean = np.mean([ result['macro avg']['f1-score'] for result in results ])
f1_std = np.std([ result['macro avg']['f1-score'] for result in results ])

print(f"Accuracy: {acc_mean:.2%} +- {acc_std:.2%}")
print(f"F1-score: {f1_mean:.2%} +- {f1_std:.2%}")

Accuracy: 98.07% +- 1.17%
F1-score: 97.93% +- 1.23%
