In [None]:
pip install numpy pandas scikit-learn scikit-optimize


Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.1.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.1.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.1.0 scikit-optimize-0.10.2


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from skopt import BayesSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder


In [None]:
# Carregar dataset Titanic
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df = pd.read_csv(url)

# Selecionar atributos relevantes
df = df[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
df.dropna(inplace=True)  # Remover linhas com valores nulos

# Transformar variáveis categóricas em numéricas
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['Embarked'] = le.fit_transform(df['Embarked'])

# Separar features e target
X = df.drop('Survived', axis=1)
y = df['Survived']

# Dividir os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Definir espaço de busca para Random Forest
rf_search = {
    'n_estimators': (50, 500),
    'max_depth': (3, 20),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 20)
}

# Definir espaço de busca para Árvore de Decisão
dt_search = {
    'max_depth': (3, 20),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 20)
}

# Criar otimizadores
rf_opt = BayesSearchCV(RandomForestClassifier(random_state=42), rf_search, n_iter=30, cv=5, scoring='accuracy', n_jobs=-1)
dt_opt = BayesSearchCV(DecisionTreeClassifier(random_state=42), dt_search, n_iter=30, cv=5, scoring='accuracy', n_jobs=-1)

# Ajustar modelos
rf_opt.fit(X_train, y_train)
dt_opt.fit(X_train, y_train)


In [None]:
# Fazer previsões
y_pred_rf = rf_opt.best_estimator_.predict(X_test)
y_pred_dt = dt_opt.best_estimator_.predict(X_test)

# Calcular métricas
def evaluate(y_true, y_pred):
    return {
        'Acurácia': accuracy_score(y_true, y_pred),
        'Precisão': precision_score(y_true, y_pred),
        'Recall': recall_score(y_true, y_pred),
        'F1-Score': f1_score(y_true, y_pred)
    }

rf_metrics = evaluate(y_test, y_pred_rf)
dt_metrics = evaluate(y_test, y_pred_dt)

# Comparação dos modelos
print("Métricas - Random Forest:", rf_metrics)
print("Métricas - Decision Tree:", dt_metrics)


Métricas - Random Forest: {'Acurácia': 0.7762237762237763, 'Precisão': 0.7924528301886793, 'Recall': 0.6666666666666666, 'F1-Score': 0.7241379310344828}
Métricas - Decision Tree: {'Acurácia': 0.7482517482517482, 'Precisão': 0.7755102040816326, 'Recall': 0.6031746031746031, 'F1-Score': 0.6785714285714286}


In [None]:
# Importância dos atributos no Random Forest
rf_importances = pd.Series(rf_opt.best_estimator_.feature_importances_, index=X.columns).sort_values(ascending=False)

# Importância dos atributos na Árvore de Decisão
dt_importances = pd.Series(dt_opt.best_estimator_.feature_importances_, index=X.columns).sort_values(ascending=False)

print("\nImportância dos atributos - Random Forest:\n", rf_importances)
print("\nImportância dos atributos - Decision Tree:\n", dt_importances)



Importância dos atributos - Random Forest:
 Sex         0.376060
Fare        0.198303
Age         0.174391
Pclass      0.144751
SibSp       0.046535
Parch       0.034696
Embarked    0.025263
dtype: float64

Importância dos atributos - Decision Tree:
 Sex         0.463733
Pclass      0.190917
Age         0.181644
Fare        0.129781
SibSp       0.018040
Parch       0.015884
Embarked    0.000000
dtype: float64
