In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential, layers
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from time import time
from keras.models import load_model

In [2]:
#gpu memory growth fix
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
              tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [3]:
# Loading data
train_data = pd.read_csv('E:\Python\data\Titanic\prs_train.csv')
train_data
test_data = pd.read_csv('E:\Python\data\Titanic\prs_test.csv')
test_data
# train and test split
train_X, train_y = train_data.values[:, 2:], train_data.values[:, 1]
test_X = test_data.values[:, 1:]
# One hot encoding
enc = OneHotEncoder(sparse=False, handle_unknown='ignore')
train_X_enc = enc.fit_transform(train_X)
test_X_enc = enc.transform(test_X)
# change type of data for nn to work
train_X_enc=np.asarray(train_X_enc).astype(np.float32)
train_y_enc=np.asarray(train_y).astype(np.float32)
test_X_enc=np.asarray(test_X_enc).astype(np.float32)

In [4]:
from sklearn.model_selection import train_test_split, KFold
import scipy
first_col = True
cross_fold = KFold(n_splits = 7, shuffle=True)
for train_index, test_index in cross_fold.split(train_X_enc):
    validation_X, validation_y = train_X_enc[test_index], train_y_enc[test_index]
    train_X, train_y = train_X_enc[train_index], train_y_enc[train_index]
    
    # determine the number of input features
    n_features = train_X_enc.shape[1]
    # define model
    model = Sequential()
    model.add(layers.Dense(512, kernel_initializer='he_normal', input_shape=(n_features,)))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('elu'))
    model.add(layers.Dropout(0.5))
#     model.add(layers.Dense(512, kernel_initializer='he_normal'))
#     model.add(layers.BatchNormalization())
#     model.add(layers.Activation('elu'))
#     model.add(layers.Dropout(0.5))
    model.add(layers.Dense(2, activation='softmax'))
    # compile the model
    model.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
    #callbacks
    # simple early stopping
    es = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=0.0001, verbose=1, patience=30)
    mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

    tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
    
    rlrop = ReduceLROnPlateau(monitor='accuracy', factor=0.5, patience=5, verbose=1)
    
    # fit model and save the best
    history = model.fit(x=train_X, y=train_y, 
#                         steps_per_epoch=steps_per_epoch, 
                        batch_size=8, 
                        epochs=300, 
                        validation_data=(validation_X, validation_y), 
#                         validation_steps=validation_steps, 
                        shuffle=True, 
                        callbacks=[tensorboard, es, mc, rlrop]
                       )
    saved_model = load_model('best_model.h5')
    
    probs = saved_model.predict(test_X_enc)
    predict = probs.argmax(axis=1)
    if first_col:
        pr_values = np.array(predict, ndmin=2)
        pr_values = np.transpose(pr_values)
        first_col = False
    else:
        pr_values = np.insert(pr_values, -1, predict, axis=1)
pr_values= scipy.stats.mode(pr_values, axis=1)

Epoch 1/300
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_accuracy improved from -inf to 0.80315, saving model to best_model.h5
Epoch 2/300
Epoch 00002: val_accuracy did not improve from 0.80315
Epoch 3/300
Epoch 00003: val_accuracy did not improve from 0.80315
Epoch 4/300
Epoch 00004: val_accuracy did not improve from 0.80315
Epoch 5/300
Epoch 00005: val_accuracy did not improve from 0.80315
Epoch 6/300
Epoch 00006: val_accuracy improved from 0.80315 to 0.81890, saving model to best_model.h5
Epoch 7/300
Epoch 00007: val_accuracy did not improve from 0.81890
Epoch 8/300
Epoch 00008: val_accuracy did not improve from 0.81890
Epoch 9/300
Epoch 00009: val_accuracy did not improve from 0.81890
Epoch 10/300
Epoch 00010: val_accuracy did not improve from 0.81890
Epoch 11/300
Epoch 00011: val_accuracy did not improve from 0.81890
Epoch 12/300
Epoch 00012: val_accuracy did not improve from 0.81890
Epoch 13/300
Epoch 00013: val_accuracy did not improve

Epoch 28/300
Epoch 00028: val_accuracy did not improve from 0.81890
Epoch 29/300
Epoch 00029: val_accuracy did not improve from 0.81890
Epoch 30/300
Epoch 00030: val_accuracy did not improve from 0.81890
Epoch 31/300
Epoch 00031: val_accuracy did not improve from 0.81890

Epoch 00031: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 32/300
Epoch 00032: val_accuracy did not improve from 0.81890
Epoch 33/300
Epoch 00033: val_accuracy did not improve from 0.81890
Epoch 34/300
Epoch 00034: val_accuracy did not improve from 0.81890
Epoch 35/300
Epoch 00035: val_accuracy did not improve from 0.81890
Epoch 36/300
Epoch 00036: val_accuracy did not improve from 0.81890

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 00036: early stopping
Epoch 1/300
Epoch 00001: val_accuracy improved from -inf to 0.74803, saving model to best_model.h5
Epoch 2/300
Epoch 00002: val_accuracy improved from 0.74803 to 0.75591, saving model to best_model.

Epoch 19/300
Epoch 00019: val_accuracy did not improve from 0.82677
Epoch 20/300
Epoch 00020: val_accuracy did not improve from 0.82677
Epoch 21/300
Epoch 00021: val_accuracy did not improve from 0.82677
Epoch 22/300
Epoch 00022: val_accuracy did not improve from 0.82677
Epoch 23/300
Epoch 00023: val_accuracy did not improve from 0.82677
Epoch 24/300
Epoch 00024: val_accuracy did not improve from 0.82677
Epoch 25/300
Epoch 00025: val_accuracy did not improve from 0.82677
Epoch 26/300
Epoch 00026: val_accuracy did not improve from 0.82677
Epoch 27/300
Epoch 00027: val_accuracy did not improve from 0.82677
Epoch 28/300
Epoch 00028: val_accuracy did not improve from 0.82677
Epoch 29/300
Epoch 00029: val_accuracy did not improve from 0.82677
Epoch 30/300
Epoch 00030: val_accuracy did not improve from 0.82677
Epoch 31/300
Epoch 00031: val_accuracy did not improve from 0.82677
Epoch 32/300
Epoch 00032: val_accuracy did not improve from 0.82677

Epoch 00032: ReduceLROnPlateau reducing learnin

Epoch 2/300
Epoch 00002: val_accuracy improved from 0.77953 to 0.82677, saving model to best_model.h5
Epoch 3/300
Epoch 00003: val_accuracy improved from 0.82677 to 0.83465, saving model to best_model.h5
Epoch 4/300
Epoch 00004: val_accuracy did not improve from 0.83465
Epoch 5/300
Epoch 00005: val_accuracy did not improve from 0.83465
Epoch 6/300
Epoch 00006: val_accuracy did not improve from 0.83465
Epoch 7/300
Epoch 00007: val_accuracy did not improve from 0.83465
Epoch 8/300
Epoch 00008: val_accuracy improved from 0.83465 to 0.84252, saving model to best_model.h5
Epoch 9/300
Epoch 00009: val_accuracy did not improve from 0.84252
Epoch 10/300
Epoch 00010: val_accuracy did not improve from 0.84252
Epoch 11/300
Epoch 00011: val_accuracy did not improve from 0.84252
Epoch 12/300
Epoch 00012: val_accuracy did not improve from 0.84252
Epoch 13/300
Epoch 00013: val_accuracy did not improve from 0.84252
Epoch 14/300
Epoch 00014: val_accuracy did not improve from 0.84252
Epoch 15/300
Epoch 

Epoch 31/300
Epoch 00031: val_accuracy did not improve from 0.84252
Epoch 32/300
Epoch 00032: val_accuracy did not improve from 0.84252
Epoch 33/300
Epoch 00033: val_accuracy did not improve from 0.84252
Epoch 34/300
Epoch 00034: val_accuracy did not improve from 0.84252
Epoch 35/300
Epoch 00035: val_accuracy did not improve from 0.84252

Epoch 00035: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 36/300
Epoch 00036: val_accuracy did not improve from 0.84252
Epoch 37/300
Epoch 00037: val_accuracy did not improve from 0.84252
Epoch 38/300
Epoch 00038: val_accuracy did not improve from 0.84252
Epoch 00038: early stopping
Epoch 1/300
Epoch 00001: val_accuracy improved from -inf to 0.76378, saving model to best_model.h5
Epoch 2/300
Epoch 00002: val_accuracy improved from 0.76378 to 0.83465, saving model to best_model.h5
Epoch 3/300
Epoch 00003: val_accuracy did not improve from 0.83465
Epoch 4/300
Epoch 00004: val_accuracy did not improve from 0.83465
Epoch 5/300


Epoch 00020: val_accuracy did not improve from 0.83465
Epoch 21/300
Epoch 00021: val_accuracy did not improve from 0.83465
Epoch 22/300
Epoch 00022: val_accuracy did not improve from 0.83465
Epoch 23/300
Epoch 00023: val_accuracy did not improve from 0.83465
Epoch 24/300
Epoch 00024: val_accuracy did not improve from 0.83465
Epoch 25/300
Epoch 00025: val_accuracy did not improve from 0.83465
Epoch 26/300
Epoch 00026: val_accuracy did not improve from 0.83465
Epoch 27/300
Epoch 00027: val_accuracy did not improve from 0.83465
Epoch 28/300
Epoch 00028: val_accuracy did not improve from 0.83465

Epoch 00028: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 29/300
Epoch 00029: val_accuracy did not improve from 0.83465
Epoch 30/300
Epoch 00030: val_accuracy did not improve from 0.83465
Epoch 31/300
Epoch 00031: val_accuracy did not improve from 0.83465
Epoch 32/300
Epoch 00032: val_accuracy did not improve from 0.83465
Epoch 00032: early stopping
Epoch 1/300
Epoch 00

Epoch 16/300
Epoch 00016: val_accuracy did not improve from 0.83465
Epoch 17/300
Epoch 00017: val_accuracy did not improve from 0.83465
Epoch 18/300
Epoch 00018: val_accuracy did not improve from 0.83465
Epoch 19/300
Epoch 00019: val_accuracy did not improve from 0.83465

Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 20/300
Epoch 00020: val_accuracy did not improve from 0.83465
Epoch 21/300
Epoch 00021: val_accuracy did not improve from 0.83465
Epoch 22/300
Epoch 00022: val_accuracy did not improve from 0.83465
Epoch 23/300
Epoch 00023: val_accuracy did not improve from 0.83465
Epoch 24/300
Epoch 00024: val_accuracy did not improve from 0.83465
Epoch 25/300
Epoch 00025: val_accuracy did not improve from 0.83465

Epoch 00025: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 26/300
Epoch 00026: val_accuracy did not improve from 0.83465
Epoch 27/300
Epoch 00027: val_accuracy did not improve from 0.83465
Epoch 28/300
Epoch 000

Epoch 00010: val_accuracy did not improve from 0.81890
Epoch 11/300
Epoch 00011: val_accuracy did not improve from 0.81890
Epoch 12/300
Epoch 00012: val_accuracy did not improve from 0.81890
Epoch 13/300
Epoch 00013: val_accuracy did not improve from 0.81890
Epoch 14/300
Epoch 00014: val_accuracy did not improve from 0.81890
Epoch 15/300
Epoch 00015: val_accuracy did not improve from 0.81890
Epoch 16/300
Epoch 00016: val_accuracy did not improve from 0.81890
Epoch 17/300
Epoch 00017: val_accuracy did not improve from 0.81890
Epoch 18/300
Epoch 00018: val_accuracy did not improve from 0.81890
Epoch 19/300
Epoch 00019: val_accuracy did not improve from 0.81890
Epoch 20/300
Epoch 00020: val_accuracy did not improve from 0.81890
Epoch 21/300
Epoch 00021: val_accuracy did not improve from 0.81890
Epoch 22/300
Epoch 00022: val_accuracy did not improve from 0.81890

Epoch 00022: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 23/300
Epoch 00023: val_accuracy did not i

Epoch 6/300
Epoch 00006: val_accuracy improved from 0.79528 to 0.81102, saving model to best_model.h5
Epoch 7/300
Epoch 00007: val_accuracy improved from 0.81102 to 0.82677, saving model to best_model.h5
Epoch 8/300
Epoch 00008: val_accuracy did not improve from 0.82677
Epoch 9/300
Epoch 00009: val_accuracy did not improve from 0.82677
Epoch 10/300
Epoch 00010: val_accuracy improved from 0.82677 to 0.83465, saving model to best_model.h5
Epoch 11/300
Epoch 00011: val_accuracy did not improve from 0.83465
Epoch 12/300
Epoch 00012: val_accuracy did not improve from 0.83465
Epoch 13/300
Epoch 00013: val_accuracy did not improve from 0.83465
Epoch 14/300
Epoch 00014: val_accuracy did not improve from 0.83465
Epoch 15/300
Epoch 00015: val_accuracy did not improve from 0.83465
Epoch 16/300
Epoch 00016: val_accuracy did not improve from 0.83465
Epoch 17/300
Epoch 00017: val_accuracy did not improve from 0.83465
Epoch 18/300
Epoch 00018: val_accuracy did not improve from 0.83465
Epoch 19/300
Ep

Epoch 35/300
Epoch 00035: val_accuracy did not improve from 0.85039
Epoch 36/300
Epoch 00036: val_accuracy did not improve from 0.85039
Epoch 37/300
Epoch 00037: val_accuracy did not improve from 0.85039
Epoch 38/300
Epoch 00038: val_accuracy did not improve from 0.85039
Epoch 39/300
Epoch 00039: val_accuracy did not improve from 0.85039
Epoch 40/300
Epoch 00040: val_accuracy did not improve from 0.85039
Epoch 41/300
Epoch 00041: val_accuracy did not improve from 0.85039

Epoch 00041: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 42/300
Epoch 00042: val_accuracy did not improve from 0.85039
Epoch 43/300
Epoch 00043: val_accuracy did not improve from 0.85039
Epoch 44/300
Epoch 00044: val_accuracy did not improve from 0.85039
Epoch 45/300
Epoch 00045: val_accuracy did not improve from 0.85039
Epoch 46/300
Epoch 00046: val_accuracy did not improve from 0.85039
Epoch 47/300
Epoch 00047: val_accuracy improved from 0.85039 to 0.85827, saving model to best_model.h5


Epoch 00062: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 63/300
Epoch 00063: val_accuracy did not improve from 0.85827
Epoch 64/300
Epoch 00064: val_accuracy did not improve from 0.85827
Epoch 65/300
Epoch 00065: val_accuracy did not improve from 0.85827
Epoch 66/300
Epoch 00066: val_accuracy did not improve from 0.85827
Epoch 67/300
Epoch 00067: val_accuracy did not improve from 0.85827

Epoch 00067: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 68/300
Epoch 00068: val_accuracy did not improve from 0.85827
Epoch 69/300
Epoch 00069: val_accuracy did not improve from 0.85827
Epoch 70/300
Epoch 00070: val_accuracy did not improve from 0.85827
Epoch 71/300
Epoch 00071: val_accuracy did not improve from 0.85827
Epoch 72/300
Epoch 00072: val_accuracy did not improve from 0.85827

Epoch 00072: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.
Epoch 73/300
Epoch 00073: val_accuracy did not improve from 0.85827
Epoch 74/

In [5]:
z = zip(test_data.iloc[:, 0], [x[-1] for x in pr_values[0]])
data3 = pd.DataFrame(z, columns=['PassengerId', 'Survived'])
data3

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0


In [6]:
# saving results
data3.to_csv('E:/Python/data/Titanic/gender_submission19.csv', index=False)