**Parte 1** | Pré-processamento dos Dados

In [13]:
import pandas as pd

# 1 Carregar a base de dados no Colab
path = 'Titanic-Dataset.csv'
df = pd.read_csv(path)

# 2 Limpeza de Dados

# Verificação de valores ausentes
print(df.isnull().sum())
# Preencher valores ausentes na coluna 'Age' com a mediana
df['Age'].fillna(df['Age'].median(), inplace=True)
# Preencher valores ausentes na coluna 'Fare' com a mediana
df['Fare'].fillna(df['Fare'].median(), inplace=True)
# Remover linhas que ainda contenham valores nulos
df_clean = df.dropna()

# 3 Seleção de variáveis

# Colunas para análise
df_selected = df_clean[['Pclass', 'Sex', 'Age', 'Fare', 'Survived']]
# Converter a coluna 'Sex' em variáveis numéricas
df_selected = pd.get_dummies(df_selected, columns=['Sex'], drop_first=True)
# Exibindo as primeiras linhas após a seleção e transformação
print(df_selected.head())

# 4 Divisão dos dados
x = df_selected[['Pclass', 'Sex_male', 'Age', 'Fare']]
y = df_selected['Survived']

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64
    Pclass   Age     Fare  Survived  Sex_male
1        1  38.0  71.2833         1     False
3        1  35.0  53.1000         1     False
6        1  54.0  51.8625         0      True
10       3   4.0  16.7000         1     False
11       1  58.0  26.5500         1     False


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Fare'].fillna(df['Fare'].median(), inplace=True)


**Parte 2** | Implementação simples de um algoritmo de classificação (k-NN)

In [14]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

# Dividir os dados em treino e teste (70% treino, 30% teste)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train, y_train)

# Fazer previsões no conjunto de teste
y_pred = knn.predict(x_test)

**Parte 3** | Avaliação de desempenho

In [15]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# 1 Avaliando acurácia
accuracy = accuracy_score(y_test, y_pred)
print(f'Acurácia: {accuracy:.2f}')

# 2 Matriz de confusão
cm = confusion_matrix(y_test, y_pred)
print(cm)

Acurácia: 0.66
[[ 3 10]
 [11 37]]
