In [None]:
import numpy as np
import pandas as pd

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
from pandas import DataFrame as df

pd_train = pd.read_csv('/kaggle/input/titanic/train.csv', header=0)
pd_test = pd.read_csv('/kaggle/input/titanic/test.csv', header=0)

# Preprocessing Train Data

# Converting to numpy
np_train = pd_train.to_numpy()
# Shuffling before use
np.random.shuffle(np_train)
# np_train1 = np.random.shuffle(pd_train.to_numpy())
np_test = pd_test.to_numpy()

# Separating Data and Labels, removing unnecessary columns
train_data = np.delete(np_train, [1, 3, 8, 10], axis=1)
train_labels = np_train[:,1].astype(int)

test_data = np.delete(np_test, [2,7,9], axis=1)

# Male = 1 Female = 0
train_data[:,2] = (train_data[:,2] == 'male').astype(int)
test_data[:,2] = (test_data[:,2] == 'male').astype(int)

# C = 1, Q = 2, S = 3
pd_train = pd.DataFrame(data=train_data, index=None, columns=None)
pd_train.replace(['C', 'Q', 'S'],[1, 2, 3], inplace=True)

pd_test = pd.DataFrame(data=test_data, index=None, columns=None)
pd_test.replace(['C', 'Q', 'S'],[1, 2, 3], inplace=True)

# Filling in NaN as 0 
pd_train.fillna(0, inplace=True)
pd_test.fillna(0, inplace=True)

print('Output after all processing \n\n',pd_train.head())
print('Output after all processing \n',pd_test.head())

In [None]:
from pandas import DataFrame as df

pd_train = pd.read_csv('/kaggle/input/titanic/train.csv', header=0)
pd_test = pd.read_csv('/kaggle/input/titanic/test.csv', header=0)

# Preprocessing Train Data

# Replacing C = 1, Q = 2, S = 3
pd_train.replace(['C', 'Q', 'S'],[1, 2, 3], inplace=True)
pd_test.replace(['C', 'Q', 'S'],[1, 2, 3], inplace=True)

# Filling in NaN as 0
pd_train.fillna(0, inplace=True)
pd_test.fillna(0, inplace=True)

# Converting to numpy
np_train = pd_train.to_numpy()
np_test = pd_test.to_numpy()
print(np_train[1])

In [None]:
def preprocessing(np_train, np_test):
    # Shuffling before use
    np.random.shuffle(np_train)

    # Separating Data and Labels, removing unnecessary columns
    train_data = np.delete(np_train, [1, 3, 8, 10], axis=1)
    train_labels = np_train[:,1].astype(int)

    test_data = np.delete(np_test, [2,7,9], axis=1)

    # Male = 1 Female = 0
    train_data[:,2] = (train_data[:,2] == 'male').astype(int)
    test_data[:,2] = (test_data[:,2] == 'male').astype(int)
    
#     print(train_data.shape)
#     print(train_data[0])
#     print(train_data)
    
    # Vectorizing data
    mean = train_data.mean(axis=0)
    train_data -= mean
    
    std = np.std(train_data)
    train_data /= std

    test_data -= mean
    test_data /= std
    
    return train_data.astype(float), train_labels.astype(float), test_data.astype(float)

In [None]:
# Building Network

from keras import models
from keras import layers
from keras import regularizers

def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(8,)))
    model.add(layers.Dense(32, kernel_regularizer=regularizers.l1_l2(l1=0.002, l2=0.002), activation='relu'))
    model.add(layers.Dense(1,activation='sigmoid'))

    model.compile(optimizer=optimizers.RMSprop(lr=0.001),
                  loss='binary_crossentropy',
                 metrics=['accuracy'])
    return model

In [None]:
# Validation

all_mae = []
all_mse = []
all_histories = []
all_mae_histories = []
all_val_loss = []
all_val_acc = []
all_train_loss = []
all_train_acc = []
all_models=[]

# K-fold evaluation
def train(np_train, np_test, num_epochs, k, batch):
    for i in range(k):
        print('k-fold #', i)
        
        train_data, train_labels, test_data = preprocessing(np_train, np_test)
        
        num_val_sample = len(train_data) // k
        
        x_val = train_data[num_val_sample * i: num_val_sample * (i + 1)]
        y_val = train_labels[num_val_sample * i: num_val_sample * (i + 1)]
        
        x_train = np.concatenate([
            train_data[: num_val_sample * i],
            train_data[(i + 1) * num_val_sample:]],
        axis=0)
        
        y_train = np.concatenate([
            train_labels[: num_val_sample * i],
            train_labels[(i + 1) * num_val_sample:]],
        axis=0)
        
        model = build_model()
        history = model.fit(x_train, y_train,
                            validation_data = (x_val, y_val),
                            epochs=num_epochs, batch_size=batch, verbose=0)

        all_histories.append(history.history)
        
        all_val_loss.append(history.history['val_loss'])
        all_val_acc.append(history.history['val_accuracy'])
        all_train_loss.append(history.history['loss'])
        all_train_acc.append(history.history['accuracy'])
        
        val_mse, val_mae = model.evaluate(x_val, y_val, verbose=0)
        all_mae.append(val_mae)
        all_mse.append(val_mse)
        all_models.append(model)
    model = all_models[8]
    
    return model, test_data

In [None]:
# Running the model
k = 9
num_epochs = 350
batch = 32

model, test_data = train(np_train, np_test, num_epochs, k, batch)

In [None]:
import matplotlib.pyplot as plt

prediction = model.predict_classes(test_data)
col_1 = prediction[:,0]
col_0 = range(892,1310)
output = pd.DataFrame({"PassengerId": col_0 ,"Survived": col_1})
file = output.to_csv("submission.csv", index=False)
print(output)

avg_train_loss = [np.mean([x[i] for x in all_train_loss]) for i in range(num_epochs)]
avg_train_acc = [np.mean([x[i] for x in all_train_acc]) for i in range(num_epochs)]
avg_val_acc = [np.mean([x[i] for x in all_val_acc]) for i in range(num_epochs)]
avg_val_loss = [np.mean([x[i] for x in all_val_loss]) for i in range(num_epochs)]

mse = [x for x in all_mse]
mae = [x for x in all_mse]

epochs = range(1, len(avg_train_loss) + 1)

plt.plot(epochs, avg_train_loss, 'bo', label='Training loss')
plt.plot(epochs, avg_val_loss, 'b', label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

plt.plot(epochs, avg_train_acc, 'bo', label='Training accuracy')
plt.plot(epochs, avg_val_acc, 'b', label='Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

plt.plot(range(1, len(mse) + 1), mse, 'b', label='Mean Squarred Error')
plt.xlabel('K-fold')
plt.ylabel('MSE')
plt.legend()

plt.show()

plt.plot(range(1, len(mae) + 1), mae, 'b', label='Mean Absolute Error')
plt.xlabel('K-fold')
plt.ylabel('MAE')
plt.legend()

plt.show()