In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Carregar o conjunto de dados do Titanic
data = pd.read_csv('train.csv')

# Pré-processamento dos dados
# Remover colunas irrelevantes
data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

# Preencher valores faltantes
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Converter variáveis categóricas em numéricas
data['Sex'] = data['Sex'].map({'female': 0, 'male': 1}).astype(int)
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int)

# Dividir os dados em atributos e rótulos
X = data.drop('Survived', axis=1)
y = data['Survived']

# Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Criar e treinar o classificador Decision Tree
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Fazer previsões nos dados de teste
y_pred = clf.predict(X_test)

# Calcular e exibir as métricas de avaliação
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1)


Accuracy: 0.7877094972067039
Precision: 0.7368421052631579
Recall: 0.7567567567567568
F1 Score: 0.7466666666666667
