In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
np.set_printoptions(precision=2)
import logging
logging.getLogger("tensorflow").setLevel(logging.ERROR)
tf.autograph.set_verbosity(0)

# Leitura e Tratamento dos Dados

In [None]:
train = pd.read_csv('./dataset/train.csv')
test = pd.read_csv('./dataset/test.csv')
display(train)
display(test.head())

In [None]:
print(f'Train set shape: {train.shape}\nTest set shape: {test.shape}')

In [None]:
train['Embarked'].value_counts()

In [None]:
def transforma_embarked(feature):
    if feature == 'S':
        return 0
    elif feature == 'C':
        return 1
    else:
        return 2


train['Embarked_b'] = train['Embarked'].map(transforma_embarked)
test['Embarked_b'] = test['Embarked'].map(transforma_embarked)

In [None]:
variaveis = ['Sex_b', 'Age', 'Name_t', 'Pclass', 'Embarked_b', 'SibSp', 'Parch', 'Fare']

In [None]:
train['Sex'].value_counts()

In [None]:
def transforma_sexo(feature):
    if feature == 'female':
        return 1
    else:
        return 0


train['Sex_b'] = train['Sex'].map(transforma_sexo)
test['Sex_b'] = test['Sex'].map(transforma_sexo)

In [None]:
display(train.head())
train['Sex_b'].value_counts()

In [None]:
train['Name'].value_counts()

In [None]:
nome = ['Mr.', 'Miss.', 'Mrs.', 'Master.', 'Dr.', 'Col.', 'Major.', 'Sir.', 'Rev.', 'Mlle.', 'Capt.', 'Lady.', 'Nme.', 'Ms.', 'Jonkheer.', 'Don.']
def split(feature):
    for i in nome:
        if i in feature:
            return f'{nome.index(i):.0f}'

In [None]:
train['Name_t'] = train['Name'].map(split)
test['Name_t'] = test['Name'].map(split)
train['Name_t'].value_counts()

In [None]:
train.head()

In [None]:
X_train = train[variaveis].fillna(-1)
X_test = test[variaveis].fillna(-1)
y_train = train['Survived']

In [None]:
X_train_s, X_cv, y_train_s, y_cv = train_test_split(X_train, y_train, test_size=0.4, random_state=42)
X_train_s.shape, X_cv.shape, y_train_s.shape, y_cv.shape

In [None]:
scaler = StandardScaler()
X_train_norm = scaler.fit_transform(X_train_s)
X_train_norm, X_train_s

In [None]:
X_cv_norm = scaler.transform(X_cv)
X_test_norm = scaler.transform(X_test)

# Rede Neural

In [None]:
learning_rate = 1e-4
lambda_r = 1e-2

# Criando o modelo
model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=250, activation='relu', name='L1', kernel_regularizer=tf.keras.regularizers.l2(lambda_r)),
    tf.keras.layers.Dense(units=150, activation='relu', name='L2', kernel_regularizer=tf.keras.regularizers.l2(lambda_r)),
    tf.keras.layers.Dense(units=75, activation='relu', name='L3', kernel_regularizer=tf.keras.regularizers.l2(lambda_r)),
    tf.keras.layers.Dense(units=25, activation='relu', name='L4', kernel_regularizer=tf.keras.regularizers.l2(lambda_r)),
    tf.keras.layers.Dense(units=1, activation='linear', name='L5')
], name='model')

# Definindo a loss e otimizador
model.compile(loss=BinaryCrossentropy(from_logits=True),
              optimizer=Adam(learning_rate=learning_rate))

In [None]:
#treinando o modelo
model.fit(X_train_norm, y_train_s, epochs=500, verbose=0)

In [None]:
#setando o threshold para a classificação
threshold = 0.5

#gravando a fração de erro dos exemplos para o trainingset
yhat = model.predict(X_train_norm)
yhat = tf.math.sigmoid(yhat)
yhat = np.where(yhat >= threshold, 1, 0)
train_error = np.sum(yhat) / np.sum(y_train_s)

#gravando a fração de erro dos exemplos para o cvset
yhat = model.predict(X_cv_norm)
yhat = tf.math.sigmoid(yhat)
yhat = np.where(yhat >= threshold, 1, 0)
cv_error = np.sum(yhat) / np.sum(y_cv)

# projetando os resultados
print(f'Erro do training set: {train_error:.2f}, erro do cv set: {cv_error:.2f}')

In [None]:
model.summary()

In [None]:
# computando a previsão de X_test
yhat = model.predict(X_test_norm)
yhat = tf.math.sigmoid(yhat)
yhat = np.where(yhat >=threshold, 1, 0)
yhat

# Criando o Resultado das Previsões para Importar ao Kaggle

In [None]:
result = pd.Series(yhat.reshape(-1), index=test['PassengerId'], name='Survived')
result

In [None]:
result.to_csv('./yhat/neural_network_model.csv', header=True)