In [18]:
import os
import sys
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Ajuste de path (necessário)
sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("."))

# Carregar base WDBC
data = load_breast_cancer()
X = data.data
y = data.target

print("Base carregada:", X.shape)

# Separar treino e teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Padronização
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

print("Treino:", X_train_scaled.shape)
print("Teste :", X_test_scaled.shape)


Base carregada: (569, 30)
Treino: (455, 30)
Teste : (114, 30)


In [19]:
import tensorflow as tf
print("TF OK:", tf.__version__)


TF OK: 2.15.0


In [20]:
from src.autoencoder import create_autoencoder, train_autoencoder
print("Autoencoder importado com sucesso!")


Autoencoder importado com sucesso!


In [33]:
from importlib import reload
import src.autoencoder
reload(src.autoencoder)

from src.autoencoder import create_autoencoder, train_autoencoder


In [34]:
autoencoder, encoder = create_autoencoder(input_dim=30, bottleneck_dim=3)
autoencoder.summary()
history = train_autoencoder(autoencoder, X_train_scaled, epochs=40)


Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 30)]              0         
                                                                 
 dense_24 (Dense)            (None, 16)                496       
                                                                 
 dense_25 (Dense)            (None, 8)                 136       
                                                                 
 dense_26 (Dense)            (None, 3)                 27        
                                                                 
 dense_27 (Dense)            (None, 8)                 32        
                                                                 
 dense_28 (Dense)            (None, 16)                144       
                                                                 
 dense_29 (Dense)            (None, 30)                510 

In [22]:
Z_train_ae = encoder.predict(X_train_scaled)
Z_test_ae  = encoder.predict(X_test_scaled)

Z_train_ae.shape, Z_test_ae.shape




((455, 3), (114, 3))

In [23]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np


In [24]:
param_grid_rf = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 5, 10, 20],
    "min_samples_split": [2, 5, 10]
}


In [25]:
def avaliar_modelo_grid(model, param_grid, X_train, y_train, X_test, y_test):
    grid = GridSearchCV(model, param_grid, cv=5, scoring="accuracy", n_jobs=-1)
    grid.fit(X_train, y_train)
    
    best_model = grid.best_estimator_
    acc = best_model.score(X_test, y_test)
    
    return acc, grid.best_params_


In [26]:
from sklearn.decomposition import PCA

# PCA com 7 componentes (você pode ajustar se quiser)
pca = PCA(n_components=7)
Z_train_pca = pca.fit_transform(X_train_scaled)
Z_test_pca  = pca.transform(X_test_scaled)

Z_train_pca.shape, Z_test_pca.shape


((455, 7), (114, 7))

In [27]:
acc_rf_original, params_rf_original = avaliar_modelo_grid(
    RandomForestClassifier(), param_grid_rf,
    X_train_scaled, y_train,
    X_test_scaled, y_test
)

acc_rf_pca, params_rf_pca = avaliar_modelo_grid(
    RandomForestClassifier(), param_grid_rf,
    Z_train_pca, y_train,
    Z_test_pca, y_test
)

acc_rf_ae, params_rf_ae = avaliar_modelo_grid(
    RandomForestClassifier(), param_grid_rf,
    Z_train_ae, y_train,
    Z_test_ae, y_test
)


In [28]:
acc_rf_original, acc_rf_pca, acc_rf_ae


(0.9473684210526315, 0.9210526315789473, 0.9385964912280702)

In [29]:
param_grid_svm = {
    "C": [0.1, 1, 10],
    "kernel": ["rbf", "linear"],
    "gamma": ["scale", "auto"]
}

acc_svm_original, params_svm_original = avaliar_modelo_grid(
    SVC(), param_grid_svm,
    X_train_scaled, y_train,
    X_test_scaled, y_test
)

acc_svm_pca, params_svm_pca = avaliar_modelo_grid(
    SVC(), param_grid_svm,
    Z_train_pca, y_train,
    Z_test_pca, y_test
)

acc_svm_ae, params_svm_ae = avaliar_modelo_grid(
    SVC(), param_grid_svm,
    Z_train_ae, y_train,
    Z_test_ae, y_test
)


In [30]:
acc_svm_original, acc_svm_pca, acc_svm_ae


(0.9824561403508771, 0.956140350877193, 0.9298245614035088)

In [31]:
import pickle
import os

os.makedirs("../models", exist_ok=True)

with open("../models/encoder.pkl", "wb") as f:
    pickle.dump(encoder, f)

print("Encoder salvo com sucesso!")


Encoder salvo com sucesso!


In [35]:
import pickle
import os

os.makedirs("../models", exist_ok=True)

with open("../models/ae_history.pkl", "wb") as f:
    pickle.dump(history.history, f)

print("History salvo com sucesso!")


History salvo com sucesso!
