<a href="https://colab.research.google.com/github/carloscesar182/ai_advanced_course/blob/main/Notebooks/MLAdvTechniques/AutoML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

# modelos
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# carregar o conjunto de dados
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

In [None]:
# visualizar dados
df_X = pd.DataFrame(X, columns=cancer.feature_names)
df_X.head()

In [None]:
# dividir dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=12)

In [None]:
# normalização dos dados
scaler = StandardScaler()
X_trained_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# definir os modelos a serem treinados através de um dicionário do python
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVC': SVC(),
    'kNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'AdaBoost': AdaBoostClassifier(),
}

In [None]:
# treinar os modelos usando o crossvalscore
results = {}
for name, model in models.items():
    scores = cross_val_score(model, X_trained_scaled, y_train, cv=5)
    results[name] = scores.mean()

In [None]:
# ver os resultados
for name, score in results.items():
    print(f'{name}: {score:.4f}')

In [None]:
# separar o melhor modelo pra treinar ele
best_model_name = max(results, key=results.get)
best_model = models[best_model_name]
print(f'Melhor modelo: {best_model_name}')

In [None]:
# treinar o modelo
best_model.fit(X_trained_scaled, y_train)

In [None]:
# fazer as previsões
y_pred = best_model.predict(X_test_scaled)

In [None]:
# calcular a acurácia
accuracy = accuracy_score(y_test, y_pred)
print(f'Melhor modelo {best_model_name} - Performance: {accuracy:.4f}')