In [3]:
!pip install scikit-optimize
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from skopt import BayesSearchCV
from sklearn.metrics import accuracy_score

# Carregar a base de dados do Titanic
data = pd.read_csv("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv")

# Pré-processamento básico
data = data[['Survived', 'Pclass', 'Sex', 'Age', 'Fare']].dropna()
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})

X = data[['Pclass', 'Sex', 'Age', 'Fare']]
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definição do modelo e espaço de hiperparâmetros
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# GridSearchCV
grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# RandomizedSearchCV
random_search = RandomizedSearchCV(DecisionTreeClassifier(), param_grid, n_iter=10, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

# BayesSearchCV
bayes_search = BayesSearchCV(DecisionTreeClassifier(), param_grid, n_iter=10, cv=5, scoring='accuracy', random_state=42)
bayes_search.fit(X_train, y_train)

# Avaliação
def evaluate(model, name):
    y_pred = model.best_estimator_.predict(X_test)
    print(f"{name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")

evaluate(grid_search, "GridSearchCV")
evaluate(random_search, "RandomizedSearchCV")
evaluate(bayes_search, "BayesSearchCV")


GridSearchCV Accuracy: 0.7203
RandomizedSearchCV Accuracy: 0.7413
BayesSearchCV Accuracy: 0.7902
