In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Link direto do CSV do Titanic
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"

# Carregar com pandas
import pandas as pd
df = pd.read_csv(url)

# Ver as primeiras linhas
df.head()

# Verificar valores ausentes
print(df.isnull().sum())

# Remover colunas menos úteis
df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)

# Preencher Idade com média
df['Age'] = df['Age'].fillna(df['Age'].mean())

# Remover linhas sem embarque
df = df.dropna(subset=['Embarked'])

# Converter sexo para 0/1
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

# One-hot encode para Embarked
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

X = df.drop(['Survived', 'PassengerId'], axis=1)
y = df['Survived']

# Dividir treino/teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Acurácia:", accuracy_score(y_test, y_pred))
print("Matriz de Confusão:\n", confusion_matrix(y_test, y_pred))
print("Relatório de Classificação:\n", classification_report(y_test, y_pred))



PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64
Acurácia: 0.7808988764044944
Matriz de Confusão:
 [[85 24]
 [15 54]]
Relatório de Classificação:
               precision    recall  f1-score   support

           0       0.85      0.78      0.81       109
           1       0.69      0.78      0.73        69

    accuracy                           0.78       178
   macro avg       0.77      0.78      0.77       178
weighted avg       0.79      0.78      0.78       178

