In [None]:
# Instalação de Bibliotecas
pip install pandas numpy matplotlib scikit-learn tensorflow keras seaborn optuna

In [1]:
# Importação de Bibliotecas
 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.neural_network import MLPClassifier

import optuna

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, MaxPooling1D, Dropout, SpatialDropout1D, SimpleRNN, Embedding
from keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Carregar os dados
df = pd.read_csv("C:/Users/ana_v/OneDrive/Documentos/Repositórios/GSE25066.csv", low_memory=False)

# Obtém os valores únicos na coluna 'Type'
unique_types = df['type'].unique()

# Cria um dicionário mapeando cada tipo único para um número
type_to_numeric = {type_name: index for index, type_name in enumerate(unique_types)}

# Aplica a substituição usando o método map
df['type'] = df['type'].map(type_to_numeric)

# Armazena a coluna 'type' para adicioná-la de volta posteriormente
type_column = df['type']

# Prepara o DataFrame para normalização (remover colunas desnecessárias)
df_num = df.drop(columns=["sample", "type"])

# Normalizar os dados
scaler = StandardScaler()
dados_normalizados = scaler.fit_transform(df_num)

# Aplicação do PCA
pca = PCA(n_components=0.8)  
pca.fit(dados_normalizados)
dados_pca = pca.transform(dados_normalizados)

In [3]:
# Fixar a seed para garantir a reprodutibilidade
seed = 1

# Divisão de treino e teste com random_state
X_train, X_test, y_train, y_test = train_test_split(dados_pca, df['type'], test_size=0.2, random_state=seed)

In [4]:
# Função objetivo para otimização do SVM
def objective_svm(trial):
    svc_c = trial.suggest_loguniform('svc_c', 1e-5, 1e2)
    svc_kernel = trial.suggest_categorical('svc_kernel', ['linear', 'poly', 'rbf', 'sigmoid'])
    
    model_svm = SVC(C=svc_c, kernel=svc_kernel, random_state=seed)
    score = cross_val_score(model_svm, X_train, y_train, cv=10, scoring='accuracy').mean()
    return score

# Função objetivo para otimização do Random Forest
def objective_rf(trial):
    rf_n_estimators = trial.suggest_int('rf_n_estimators', 50, 200)
    rf_max_depth = trial.suggest_int('rf_max_depth', 10, 50)
    rf_min_samples_split = trial.suggest_int('rf_min_samples_split', 2, 20)
    
    model_rf = RandomForestClassifier(n_estimators=rf_n_estimators, max_depth=rf_max_depth,
                                      min_samples_split=rf_min_samples_split, random_state=seed)
    score = cross_val_score(model_rf, X_train, y_train, cv=10, scoring='accuracy').mean()
    return score

# Função objetivo para otimização da Regressão Logística
def objective_lr(trial):
    lr_c = trial.suggest_loguniform('lr_c', 1e-5, 1e2)
    lr_solver = trial.suggest_categorical('lr_solver', ['newton-cg', 'lbfgs', 'liblinear', 'saga'])
    
    model_lr = LogisticRegression(C=lr_c, solver=lr_solver, max_iter=10000, random_state=seed)
    score = cross_val_score(model_lr, X_train, y_train, cv=10, scoring='accuracy').mean()
    return score

# Estudo e otimização com Optuna para SVM
study_svm = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=seed))
study_svm.optimize(objective_svm, n_trials=10)
best_params_svm = study_svm.best_params
print("Melhores parâmetros para SVM:", best_params_svm)

# Estudo e otimização com Optuna para Random Forest
study_rf = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=seed))
study_rf.optimize(objective_rf, n_trials=10)
best_params_rf = study_rf.best_params
print("Melhores parâmetros para Random Forest:", best_params_rf)

# Estudo e otimização com Optuna para Regressão Logística
study_lr = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=seed))
study_lr.optimize(objective_lr, n_trials=10)
best_params_lr = study_lr.best_params
print("Melhores parâmetros para Regressão Logística:", best_params_lr)

[I 2024-08-30 16:30:40,743] A new study created in memory with name: no-name-d753e999-d2dd-4b25-827f-a0d6cefbf3b1
  svc_c = trial.suggest_loguniform('svc_c', 1e-5, 1e2)
[I 2024-08-30 16:30:40,874] Trial 0 finished with value: 0.7907926829268292 and parameters: {'svc_c': 0.008301451461243867, 'svc_kernel': 'linear'}. Best is trial 0 with value: 0.7907926829268292.
  svc_c = trial.suggest_loguniform('svc_c', 1e-5, 1e2)
[I 2024-08-30 16:30:41,028] Trial 1 finished with value: 0.3670731707317073 and parameters: {'svc_c': 4.429657657076251e-05, 'svc_kernel': 'sigmoid'}. Best is trial 0 with value: 0.7907926829268292.
  svc_c = trial.suggest_loguniform('svc_c', 1e-5, 1e2)
[I 2024-08-30 16:30:41,224] Trial 2 finished with value: 0.3670731707317073 and parameters: {'svc_c': 0.008597290483171273, 'svc_kernel': 'rbf'}. Best is trial 0 with value: 0.7907926829268292.
  svc_c = trial.suggest_loguniform('svc_c', 1e-5, 1e2)
[I 2024-08-30 16:30:41,420] Trial 3 finished with value: 0.42128048780487803

Melhores parâmetros para SVM: {'svc_c': 83.56561666138207, 'svc_kernel': 'rbf'}


[I 2024-08-30 16:30:47,279] Trial 0 finished with value: 0.6231707317073171 and parameters: {'rf_n_estimators': 112, 'rf_max_depth': 39, 'rf_min_samples_split': 2}. Best is trial 0 with value: 0.6231707317073171.
[I 2024-08-30 16:30:51,280] Trial 1 finished with value: 0.6333536585365854 and parameters: {'rf_n_estimators': 95, 'rf_max_depth': 16, 'rf_min_samples_split': 3}. Best is trial 1 with value: 0.6333536585365854.
[I 2024-08-30 16:30:54,484] Trial 2 finished with value: 0.6309146341463415 and parameters: {'rf_n_estimators': 78, 'rf_max_depth': 24, 'rf_min_samples_split': 9}. Best is trial 1 with value: 0.6333536585365854.
[I 2024-08-30 16:30:59,612] Trial 3 finished with value: 0.6235975609756098 and parameters: {'rf_n_estimators': 131, 'rf_max_depth': 27, 'rf_min_samples_split': 15}. Best is trial 1 with value: 0.6333536585365854.
[I 2024-08-30 16:31:03,706] Trial 4 finished with value: 0.6134756097560976 and parameters: {'rf_n_estimators': 80, 'rf_max_depth': 46, 'rf_min_sampl

Melhores parâmetros para Random Forest: {'rf_n_estimators': 182, 'rf_max_depth': 46, 'rf_min_samples_split': 3}


[I 2024-08-30 16:31:30,258] Trial 0 finished with value: 0.7859146341463414 and parameters: {'lr_c': 0.008301451461243867, 'lr_solver': 'newton-cg'}. Best is trial 0 with value: 0.7859146341463414.
  lr_c = trial.suggest_loguniform('lr_c', 1e-5, 1e2)
[I 2024-08-30 16:31:43,190] Trial 1 finished with value: 0.778719512195122 and parameters: {'lr_c': 4.429657657076251e-05, 'lr_solver': 'saga'}. Best is trial 0 with value: 0.7859146341463414.
  lr_c = trial.suggest_loguniform('lr_c', 1e-5, 1e2)
[I 2024-08-30 16:31:44,867] Trial 2 finished with value: 0.6410365853658536 and parameters: {'lr_c': 0.008597290483171273, 'lr_solver': 'liblinear'}. Best is trial 0 with value: 0.7859146341463414.
  lr_c = trial.suggest_loguniform('lr_c', 1e-5, 1e2)
[I 2024-08-30 16:31:45,049] Trial 3 finished with value: 0.7759756097560977 and parameters: {'lr_c': 0.4934834261073345, 'lr_solver': 'lbfgs'}. Best is trial 0 with value: 0.7859146341463414.
  lr_c = trial.suggest_loguniform('lr_c', 1e-5, 1e2)
[I 2024

Melhores parâmetros para Regressão Logística: {'lr_c': 0.013653836035069419, 'lr_solver': 'newton-cg'}


In [5]:
# Treinar e avaliar o modelo SVM com os melhores parâmetros
best_model_svm = SVC(C=best_params_svm['svc_c'], kernel=best_params_svm['svc_kernel'], random_state=seed)
best_model_svm.fit(X_train, y_train)
svm_predictions_train = best_model_svm.predict(X_train)
svm_predictions_test = best_model_svm.predict(X_test)

# Avaliação SVM
svm_accuracy_train = accuracy_score(y_train, svm_predictions_train)
svm_accuracy_test = accuracy_score(y_test, svm_predictions_test)
svm_report = classification_report(y_test, svm_predictions_test, zero_division=1)

# Resultados
print(f'SVM - Acurácia (Treinamento): {svm_accuracy_train}')
print(f'SVM - Acurácia (Teste): {svm_accuracy_test}')
print(f'Acurácia média na validação cruzada: {cross_val_score(best_model_svm, X_train, y_train, cv=10).mean()}')
print(f'\nClassification Report (SVM):\n{svm_report}')
print("Matriz de Confusão:\n", confusion_matrix(y_test, svm_predictions_test))


SVM - Acurácia (Treinamento): 1.0
SVM - Acurácia (Teste): 0.7745098039215687
Acurácia média na validação cruzada: 0.7933536585365853

Classification Report (SVM):
              precision    recall  f1-score   support

           0       0.70      0.94      0.80        34
           1       0.67      0.50      0.57         8
           2       1.00      0.88      0.93        40
           3       0.67      0.46      0.55        13
           4       0.33      0.29      0.31         7

    accuracy                           0.77       102
   macro avg       0.67      0.61      0.63       102
weighted avg       0.78      0.77      0.77       102

Matriz de Confusão:
 [[32  0  0  2  0]
 [ 2  4  0  0  2]
 [ 2  1 35  0  2]
 [ 7  0  0  6  0]
 [ 3  1  0  1  2]]


In [6]:
# Treinar e avaliar o modelo Random Forest com os melhores parâmetros
best_model_rf = RandomForestClassifier(
    n_estimators=best_params_rf['rf_n_estimators'],
    max_depth=best_params_rf['rf_max_depth'],
    min_samples_split=best_params_rf['rf_min_samples_split'],
    random_state=seed
)
best_model_rf.fit(X_train, y_train)
rf_predictions_train = best_model_rf.predict(X_train)
rf_predictions_test = best_model_rf.predict(X_test)

# Avaliação Random Forest
rf_accuracy_train = accuracy_score(y_train, rf_predictions_train)
rf_accuracy_test = accuracy_score(y_test, rf_predictions_test)
rf_report = classification_report(y_test, rf_predictions_test, zero_division=1)

print(f'Random Forest - Acurácia (Teste): {rf_accuracy_test}')
print(f'Acurácia média na validação cruzada: {cross_val_score(best_model_rf, X_train, y_train, cv=10).mean()}')
print(f'\nClassification Report (Random Forest):\n{rf_report}')
print("Matriz de Confusão:\n", confusion_matrix(y_test, rf_predictions_test))


Random Forest - Acurácia (Teste): 0.696078431372549
Acurácia média na validação cruzada: 0.6405487804878048

Classification Report (Random Forest):
              precision    recall  f1-score   support

           0       0.62      0.85      0.72        34
           1       1.00      0.12      0.22         8
           2       0.75      0.97      0.85        40
           3       1.00      0.15      0.27        13
           4       1.00      0.00      0.00         7

    accuracy                           0.70       102
   macro avg       0.87      0.42      0.41       102
weighted avg       0.77      0.70      0.62       102

Matriz de Confusão:
 [[29  0  5  0  0]
 [ 5  1  2  0  0]
 [ 1  0 39  0  0]
 [ 8  0  3  2  0]
 [ 4  0  3  0  0]]


In [7]:
# Treinar e avaliar o modelo Regressão Logística com os melhores parâmetros
best_model_lr = LogisticRegression(
    C=best_params_lr['lr_c'],
    solver=best_params_lr['lr_solver'],
    max_iter=10000,
    random_state=seed
)
best_model_lr.fit(X_train, y_train)
lr_predictions_train = best_model_lr.predict(X_train)
lr_predictions_test = best_model_lr.predict(X_test)

# Avaliação Regressão Logística
lr_accuracy_train = accuracy_score(y_train, lr_predictions_train)
lr_accuracy_test = accuracy_score(y_test, lr_predictions_test)
lr_report = classification_report(y_test, lr_predictions_test)

print(f'Regressão Logística - Acurácia (Teste): {lr_accuracy_test}')
print(f'Acurácia média na validação cruzada: {cross_val_score(best_model_lr, X_train, y_train, cv=10).mean()}')
print(f'\nClassification Report (Regressão Logística):\n{lr_report}')
print("Matriz de Confusão:\n", confusion_matrix(y_test, lr_predictions_test))

Regressão Logística - Acurácia (Teste): 0.803921568627451
Acurácia média na validação cruzada: 0.7883536585365853

Classification Report (Regressão Logística):
              precision    recall  f1-score   support

           0       0.80      0.82      0.81        34
           1       0.62      0.62      0.62         8
           2       1.00      0.90      0.95        40
           3       0.62      0.77      0.69        13
           4       0.43      0.43      0.43         7

    accuracy                           0.80       102
   macro avg       0.70      0.71      0.70       102
weighted avg       0.82      0.80      0.81       102

Matriz de Confusão:
 [[28  1  0  4  1]
 [ 1  5  0  0  2]
 [ 1  2 36  0  1]
 [ 3  0  0 10  0]
 [ 2  0  0  2  3]]


In [8]:
# Função objetivo para otimização do MLP
def objective_mlp(trial):
    n_layers = trial.suggest_int('n_layers', 2, 6)
    hidden_layer_sizes = [trial.suggest_int(f'n_units_l{i}', 25, 150) for i in range(n_layers)]
    activation = trial.suggest_categorical('activation', ['relu', 'tanh', 'logistic'])
    solver = trial.suggest_categorical('solver', ['adam', 'sgd'])
    alpha = trial.suggest_loguniform('alpha', 1e-5, 1e-1)
    learning_rate_init = trial.suggest_loguniform('learning_rate_init', 1e-4, 1e-1)
    
    # Criar e treinar o modelo
    model = MLPClassifier(hidden_layer_sizes=tuple(hidden_layer_sizes), activation=activation,
                          solver=solver, alpha=alpha, learning_rate_init=learning_rate_init,
                          max_iter=500, random_state=seed)
    
    # Validação cruzada
    cv = KFold(n_splits=5, shuffle=True, random_state=seed)
    score = cross_val_score(model, X_train, y_train, cv=cv, scoring='accuracy').mean()
    return score

# Estudo de otimização com Optuna para MLP
study_mlp = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=seed))
study_mlp.optimize(objective_mlp, n_trials=10)

# Melhores parâmetros
best_params_mlp = study_mlp.best_params
print("Melhores parâmetros para MLP:", best_params_mlp)

# Treinamento e avaliação do MLP com os melhores parâmetros
best_mlp_model = MLPClassifier(hidden_layer_sizes=tuple(best_params_mlp[f'n_units_l{i}'] for i in range(best_params_mlp['n_layers'])),
                               activation=best_params_mlp['activation'],
                               solver=best_params_mlp['solver'],
                               alpha=best_params_mlp['alpha'],
                               learning_rate_init=best_params_mlp['learning_rate_init'],
                               max_iter=500, random_state=seed)
best_mlp_model.fit(X_train, y_train)
mlp_predictions_train = best_mlp_model.predict(X_train)
mlp_predictions_test = best_mlp_model.predict(X_test)

# Avaliação do modelo
mlp_accuracy_train = accuracy_score(y_train, mlp_predictions_train)
mlp_accuracy_test = accuracy_score(y_test, mlp_predictions_test)
mlp_report = classification_report(y_test, mlp_predictions_test, zero_division=1)

[I 2024-08-30 16:33:13,161] A new study created in memory with name: no-name-8a5739f9-c6e9-4d57-b44b-a30621f9daf1
  alpha = trial.suggest_loguniform('alpha', 1e-5, 1e-1)
  learning_rate_init = trial.suggest_loguniform('learning_rate_init', 1e-4, 1e-1)
[I 2024-08-30 16:33:14,986] Trial 0 finished with value: 0.36663655525444144 and parameters: {'n_layers': 4, 'n_units_l0': 115, 'n_units_l1': 25, 'n_units_l2': 63, 'n_units_l3': 43, 'activation': 'logistic', 'solver': 'sgd', 'alpha': 0.00047509237210306113, 'learning_rate_init': 0.011367330868956235}. Best is trial 0 with value: 0.36663655525444144.
  alpha = trial.suggest_loguniform('alpha', 1e-5, 1e-1)
  learning_rate_init = trial.suggest_loguniform('learning_rate_init', 1e-4, 1e-1)
[I 2024-08-30 16:33:27,727] Trial 1 finished with value: 0.71162300511894 and parameters: {'n_layers': 3, 'n_units_l0': 135, 'n_units_l1': 28, 'n_units_l2': 109, 'activation': 'tanh', 'solver': 'sgd', 'alpha': 0.0746528346269155, 'learning_rate_init': 0.0008

Melhores parâmetros para MLP: {'n_layers': 2, 'n_units_l0': 27, 'n_units_l1': 110, 'activation': 'logistic', 'solver': 'sgd', 'alpha': 3.862907418494322e-05, 'learning_rate_init': 0.005860256303804338}




In [10]:
# Avaliação para MLP
print("\nResultados do MLP:")
print("Matriz de Confusão:\n", confusion_matrix(y_test, mlp_predictions_test))
print("Acurácia (Teste):", accuracy_score(y_test, mlp_predictions_test))

# Acurácia média na validação cruzada
cv_mean_score = cross_val_score(best_mlp_model, X_train, y_train, cv=10, scoring='accuracy').mean()
print(f'Acurácia média na validação cruzada (MLP): {cv_mean_score:.4f}')

# Classification Report
print(f'\nClassification Report (MLP):\n{mlp_report}')


Resultados do MLP:
Matriz de Confusão:
 [[31  0  0  2  1]
 [ 1  6  0  0  1]
 [ 1  3 36  0  0]
 [ 4  0  0  8  1]
 [ 0  2  2  2  1]]
Acurácia (Teste): 0.803921568627451




Acurácia média na validação cruzada (MLP): 0.8007

Classification Report (MLP):
              precision    recall  f1-score   support

           0       0.84      0.91      0.87        34
           1       0.55      0.75      0.63         8
           2       0.95      0.90      0.92        40
           3       0.67      0.62      0.64        13
           4       0.25      0.14      0.18         7

    accuracy                           0.80       102
   macro avg       0.65      0.66      0.65       102
weighted avg       0.80      0.80      0.80       102





In [9]:
# Definir número de classes
num_classes = len(unique_types)

def create_cnn_model(n_conv_layers=2, n_dense_layers=1, filters=32, kernel_size=3, pool_size=2, dense_units=64, dropout_rate=0.5):
    model = Sequential()

    # Adicionar camadas convolucionais conforme definido por Optuna
    for i in range(n_conv_layers):
        if i == 0:
            model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', padding='same', input_shape=(X_train.shape[1], 1)))
        else:
            model.add(Conv1D(filters=filters * (2 ** i), kernel_size=kernel_size, activation='relu', padding='same'))
        model.add(MaxPooling1D(pool_size=pool_size))

    model.add(Flatten())

    # Adicionar camadas densas conforme definido por Optuna
    for _ in range(n_dense_layers):
        model.add(Dense(dense_units, activation='relu'))
        model.add(Dropout(dropout_rate))
    
    # Camada de saída
    model.add(Dense(num_classes, activation='softmax'))
    
    model.compile(optimizer=Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# Função objetivo para otimização do CNN com Optuna
def objective_cnn(trial):
    # Hiperparâmetros para CNN
    n_conv_layers = trial.suggest_int('n_conv_layers', 1, 4)  
    n_dense_layers = trial.suggest_int('n_dense_layers', 1, 3) 
    filters = trial.suggest_int('filters', 16, 64)
    kernel_size = trial.suggest_int('kernel_size', 2, 5) 
    pool_size = trial.suggest_int('pool_size', 2, 3) 
    dense_units = trial.suggest_int('dense_units', 32, 128)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
    epochs = trial.suggest_int('epochs', 5, 20)  

    # Criação do modelo
    model = create_cnn_model(n_conv_layers=n_conv_layers,
                             n_dense_layers=n_dense_layers,
                             filters=filters,
                             kernel_size=kernel_size,
                             pool_size=pool_size,
                             dense_units=dense_units,
                             dropout_rate=dropout_rate)

    # Validação cruzada
    kf = KFold(n_splits=5, shuffle=True, random_state=seed)
    cv_scores = []
    for train_index, val_index in kf.split(X_train):
        X_fold_train, X_fold_val = X_train[train_index], X_train[val_index]
        y_fold_train, y_fold_val = y_train.iloc[train_index], y_train.iloc[val_index]
        model.fit(X_fold_train, y_fold_train, epochs=epochs, batch_size=32, verbose=0)
        val_loss, val_accuracy = model.evaluate(X_fold_val, y_fold_val, verbose=0)
        cv_scores.append(val_accuracy)
    # Média da acurácia de validação cruzada
    mean_cv_accuracy = np.mean(cv_scores)

    return mean_cv_accuracy

# Estudo de otimização com Optuna para CNN
study_cnn = optuna.create_study(direction='maximize')
study_cnn.optimize(objective_cnn, n_trials=10)

# Melhores parâmetros
best_params_cnn = study_cnn.best_params
print("Melhores parâmetros para CNN:", best_params_cnn)

# Treinamento e avaliação do CNN com melhores parâmetros
best_cnn_model = create_cnn_model(n_conv_layers=best_params_cnn['n_conv_layers'],
                                  n_dense_layers=best_params_cnn['n_dense_layers'],
                                  filters=best_params_cnn['filters'],
                                  kernel_size=best_params_cnn['kernel_size'],
                                  pool_size=best_params_cnn['pool_size'],
                                  dense_units=best_params_cnn['dense_units'],
                                  dropout_rate=best_params_cnn['dropout_rate'])

# Treinamento do modelo com o melhor número de épocas
history = best_cnn_model.fit(X_train, y_train, epochs=best_params_cnn['epochs'], batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Avaliação da CNN no conjunto de teste
cnn_loss, cnn_accuracy = best_cnn_model.evaluate(X_test, y_test)

# Predições da CNN no conjunto de teste
cnn_predictions = best_cnn_model.predict(X_test)
cnn_predictions_classes = np.argmax(cnn_predictions, axis=1)

# Classification report da CNN
cnn_report = classification_report(y_test, cnn_predictions_classes, target_names=[str(cls) for cls in unique_types], zero_division=1, digits=2)

# Matriz de confusão
conf_matrix = confusion_matrix(y_test, cnn_predictions_classes)

# Acurácia de treinamento
train_accuracy = history.history['accuracy'][-1]

# Acurácia da validação cruzada 
cv_mean_accuracy = study_cnn.best_value

[I 2024-08-30 16:34:45,303] A new study created in memory with name: no-name-781bd389-e30e-4883-976d-761e1b2fbe30
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.2, 0.5)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
[I 2024-08-30 16:34:52,264] Trial 0 finished with value: 0.7192713022232056 and parameters: {'n_conv_layers': 2, 'n_dense_layers': 2, 'filters': 33, 'kernel_size': 3, 'pool_size': 3, 'dense_units': 65, 'dropout_rate': 0.42859391001555774, 'epochs': 13}. Best is trial 0 with value: 0.7192713022232056.
[I 2024-08-30 16:35:00,636] Trial 1 finished with value: 0.7024089097976685 and parameters: {'n_conv_layers': 3, 'n_dense_layers': 3, 'filters': 26, 'kernel_size': 2, 'pool_size': 2, 'dense_units': 109, 'dropout_rate': 0.4763570259884, 'epochs': 13}. Best is trial 0 with value: 0.7192713022232056.
[I 2024-08-30 16:35:07,200] Trial 2 finished with value: 0.8230051279067994 and parameters: {'n_conv_layers': 4, 'n_dense_layers': 2, 'filters': 29,

Melhores parâmetros para CNN: {'n_conv_layers': 2, 'n_dense_layers': 2, 'filters': 20, 'kernel_size': 4, 'pool_size': 2, 'dense_units': 73, 'dropout_rate': 0.2796797209659968, 'epochs': 19}
Epoch 1/19
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.2591 - loss: 1.9726 - val_accuracy: 0.5490 - val_loss: 1.3361
Epoch 2/19
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4301 - loss: 1.4207 - val_accuracy: 0.5588 - val_loss: 1.1670
Epoch 3/19
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5608 - loss: 1.2008 - val_accuracy: 0.6373 - val_loss: 1.0447
Epoch 4/19
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6518 - loss: 0.9802 - val_accuracy: 0.6275 - val_loss: 0.9692
Epoch 5/19
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6529 - loss: 0.8979 - val_accuracy: 0.6863 - val_loss: 0.8747
Epoch 6

In [11]:
# Exibindo resultados da CNN
print(f'\nCNN - Acurácia (Treinamento): {train_accuracy:.4f}')
print(f'CNN - Acurácia (Teste): {cnn_accuracy:.4f}')
print(f'Acurácia média na validação cruzada (CNN): {cv_mean_accuracy:.4f}')
print(f'\nMatriz de Confusão (CNN):\n{conf_matrix}')
print(f'\nClassification Report (CNN):\n{cnn_report}')


CNN - Acurácia (Treinamento): 0.9680
CNN - Acurácia (Teste): 0.7157
Acurácia média na validação cruzada (CNN): 0.9339

Matriz de Confusão (CNN):
[[22  2  1  9  0]
 [ 1  4  1  1  1]
 [ 2  0 35  2  1]
 [ 4  0  0  9  0]
 [ 1  0  1  2  3]]

Classification Report (CNN):
              precision    recall  f1-score   support

        LumA       0.73      0.65      0.69        34
      Normal       0.67      0.50      0.57         8
       Basal       0.92      0.88      0.90        40
        LumB       0.39      0.69      0.50        13
        Her2       0.60      0.43      0.50         7

    accuracy                           0.72       102
   macro avg       0.66      0.63      0.63       102
weighted avg       0.75      0.72      0.72       102

