In [None]:
import pandas as pd

# Carregando o dataset
df = pd.read_csv('/content/train.csv')

# Primeiras linhas
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
print("Shape:")
df.shape

Shape:


(891, 12)

In [None]:
print("\nTipos de dados:\n", df.dtypes)


Tipos de dados:
 PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object


In [None]:
print("\nValores nulos:\n", df.isnull().sum())


Valores nulos:
 PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64


In [None]:
print("\nDistribuição da variável alvo:\n", df['Survived'].value_counts(normalize=True))



Distribuição da variável alvo:
 Survived
0    0.616162
1    0.383838
Name: proportion, dtype: float64


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np

# Drop colunas irrelevantes ou com muitos nulos
df = df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])

# Preenchendo valores nulos
df['Age'] = df['Age'].fillna(df['Age'].mean())
df['Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])

# Codificando variáveis categóricas
df['Sex'] = LabelEncoder().fit_transform(df['Sex'])  # male=1, female=0
df['Embarked'] = LabelEncoder().fit_transform(df['Embarked'])  # S=2, C=0, Q=1

# Separando variáveis e rótulo
X = df.drop('Survived', axis=1)
y = df['Survived']

# Normalizando os dados
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividindo treino/teste
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

y_train = np.array(y_train).astype('float32')
y_test = np.array(y_test).astype('float32')


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow.keras.backend as K


In [None]:
def f1_score(y_true, y_pred):
    y_pred = K.round(y_pred)
    tp = K.sum(K.cast(y_true * y_pred, 'float'), axis=0)
    predicted_positives = K.sum(K.cast(y_pred, 'float'), axis=0)
    possible_positives = K.sum(K.cast(y_true, 'float'), axis=0)

    precision = tp / (predicted_positives + K.epsilon())
    recall = tp / (possible_positives + K.epsilon())
    f1 = 2 * (precision * recall) / (precision + recall + K.epsilon())
    return K.mean(f1)


In [None]:
model = Sequential()
model.add(Dense(1, activation='sigmoid', input_shape=(X_train.shape[1],)))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', f1_score])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
history = model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.2, verbose=1)


Epoch 1/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.7855 - f1_score: 0.3691 - loss: 0.4686 - val_accuracy: 0.8240 - val_f1_score: 0.3339 - val_loss: 0.4236
Epoch 2/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8014 - f1_score: 0.3765 - loss: 0.4540 - val_accuracy: 0.8240 - val_f1_score: 0.3339 - val_loss: 0.4233
Epoch 3/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8069 - f1_score: 0.3324 - loss: 0.4526 - val_accuracy: 0.8240 - val_f1_score: 0.3339 - val_loss: 0.4224
Epoch 4/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7754 - f1_score: 0.3595 - loss: 0.4800 - val_accuracy: 0.8240 - val_f1_score: 0.3339 - val_loss: 0.4223
Epoch 5/50
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7908 - f1_score: 0.3741 - loss: 0.4538 - val_accuracy: 0.8240 - val_f1_score: 0.3339 - val_lo

In [None]:
from sklearn.metrics import accuracy_score, f1_score

y_pred_probs = model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int)

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Acurácia: {acc:.4f}")
print(f"F1 Score: {f1:.4f}")


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Acurácia: 0.8097
F1 Score: 0.7606
