### 1. Importing Libraries

In [49]:
import pandas as pd
import numpy as np

import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

### 2. Importing Dataset Titacnic

In [5]:
df_train = pd.read_csv('titanic/train.csv')
df_test = pd.read_csv('titanic/test.csv')

In [6]:
df_train.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [7]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


### 3. Data Preprocessing

In [8]:
df_train = df_train.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

In [9]:
df_train.head(10)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,male,22.0,1,0,7.25,S
1,1,1,female,38.0,1,0,71.2833,C
2,1,3,female,26.0,0,0,7.925,S
3,1,1,female,35.0,1,0,53.1,S
4,0,3,male,35.0,0,0,8.05,S
5,0,3,male,,0,0,8.4583,Q
6,0,1,male,54.0,0,0,51.8625,S
7,0,3,male,2.0,3,1,21.075,S
8,1,3,female,27.0,0,2,11.1333,S
9,1,2,female,14.0,1,0,30.0708,C


### 4. Encoding Categorical Data

In [10]:
labelencoder = LabelEncoder()

df_train['Sex'] = labelencoder.fit_transform(df_train['Sex'])
df_train['Embarked'] = labelencoder.fit_transform(df_train['Embarked'])

df_train.head(10)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,0,3,1,22.0,1,0,7.25,2
1,1,1,0,38.0,1,0,71.2833,0
2,1,3,0,26.0,0,0,7.925,2
3,1,1,0,35.0,1,0,53.1,2
4,0,3,1,35.0,0,0,8.05,2
5,0,3,1,,0,0,8.4583,1
6,0,1,1,54.0,0,0,51.8625,2
7,0,3,1,2.0,3,1,21.075,2
8,1,3,0,27.0,0,2,11.1333,2
9,1,2,0,14.0,1,0,30.0708,0


### 5. Spliting Train to Train and Validation

In [11]:
X_train, X_validation, y_train, y_validation = train_test_split(df_train.drop(['Survived'], axis=1), df_train['Survived'], test_size=0.1, random_state=0)

### 6. Feature Scaling

In [12]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

In [13]:
sc.fit(X_train)

X_train = sc.transform(X_train)
X_validation = sc.transform(X_validation)

In [14]:
X_train

array([[-1.60703355,  0.73510857,         nan, ..., -0.4680658 ,
        -0.63579689,  0.57180976],
       [ 0.80803938,  0.73510857, -0.74697379, ..., -0.4680658 ,
        -0.47501012,  0.57180976],
       [-0.39949708,  0.73510857, -1.84760272, ...,  0.765226  ,
        -0.10634377,  0.57180976],
       ...,
       [ 0.80803938,  0.73510857,         nan, ..., -0.4680658 ,
        -0.47831921, -0.7040109 ],
       [ 0.80803938, -1.36034327,  0.42244445, ..., -0.4680658 ,
        -0.28147057,  0.57180976],
       [-0.39949708,  0.73510857,  2.07338784, ...,  0.765226  ,
         0.15838279,  0.57180976]])

In [15]:
X_validation

array([[ 8.08039385e-01,  7.35108574e-01,             nan,
        -4.67354299e-01, -4.68065802e-01, -3.41374120e-01,
        -1.97983156e+00],
       [ 8.08039385e-01,  7.35108574e-01,             nan,
        -4.67354299e-01, -4.68065802e-01, -4.82051851e-01,
         5.71809759e-01],
       [ 8.08039385e-01,  7.35108574e-01, -1.57244549e+00,
         3.12354061e+00,  7.65225998e-01, -4.27075786e-02,
        -7.04010901e-01],
       [-1.60703355e+00, -1.36034327e+00,             nan,
         4.30369427e-01, -4.68065802e-01,  2.34789137e+00,
        -1.97983156e+00],
       [ 8.08039385e-01, -1.36034327e+00, -5.90807083e-02,
        -4.67354299e-01,  1.99851780e+00, -3.25337800e-01,
        -1.97983156e+00],
       [-1.60703355e+00,  7.35108574e-01,             nan,
        -4.67354299e-01, -4.68065802e-01, -1.47076540e-02,
         5.71809759e-01],
       [-1.60703355e+00, -1.36034327e+00,  6.97601681e-01,
        -4.67354299e-01, -4.68065802e-01,  2.48924924e+00,
         5.7180975

### Neural Network Model

In [45]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

In [46]:
model.compile(optimizer='RMSPROP', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Training Model using Early Stopping

In [47]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

In [48]:
model.fit(X_train, y_train, epochs=10, batch_size=4, callbacks=[early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x185f9455660>

### Using K-Fold Cross Validation

In [50]:
k_outer = 5
k_inner = 5

kfold_outer = KFold(n_splits=k_outer, shuffle=True, random_state=42)
for train_index, validation_index in kfold_outer.split(X_train):
    X_train_outer, X_validation_outer = X_train[train_index], X_train[validation_index]
    y_train_outer, y_validation_outer = y_train[train_index], y_train[validation_index]

    bestAccuracy = 0
    BestOcultLayer = 0
    BestNeuron = 0

    ocultLayer = [1,2,3]
    neuron = [32,64,128]

    for layer, neuron in zip(ocultLayer, neuron):
        kfold_inner = KFold(n_splits=k_inner, shuffle=True, random_state=42)
        for train_index, validation_index in kfold_inner.split(X_train_outer):
            X_train_inner, X_validation_inner = X_train_outer[train_index], X_train_outer[validation_index]
            y_train_inner, y_validation_inner = y_train_outer[train_index], y_train_outer[validation_index]

            model = tf.keras.models.Sequential()
            model.add(tf.keras.layers.Dense(neuron, activation='relu'))
            for i in range(layer):
                model.add(tf.keras.layers.Dense(neuron, activation='relu'))
            model.add(tf.keras.layers.Dense(3, activation='softmax'))

            model.compile(optimizer='RMSPROP', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

            model.fit(X_train_inner, y_train_inner, epochs=10, batch_size=4, callbacks=[early_stopping])

            loss, accuracy = model.evaluate(X_validation_inner, y_validation_inner)
            if accuracy > bestAccuracy:
                bestAccuracy = accuracy
                BestOcultLayer = layer
                BestNeuron = neuron

    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Dense(BestNeuron, activation='relu'))
    for i in range(BestOcultLayer):
        model.add(tf.keras.layers.Dense(BestNeuron, activation='relu'))
    model.add(tf.keras.layers.Dense(3, activation='softmax'))

    model.compile(optimizer='RMSPROP', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train_outer, y_train_outer, epochs=10, batch_size=4, callbacks=[early_stopping])

    loss, accuracy = model.evaluate(X_validation_outer, y_validation_outer)
    print('Accuracy: %.2f' % (accuracy*100))




KeyError: '[8, 14, 27, 34, 55, 60, 141, 144, 145, 181, 196, 200, 202, 230, 252, 255, 262, 270, 278, 285, 298, 301, 310, 311, 312, 317, 318, 319, 350, 380, 397, 458, 474, 477, 484, 489, 495, 496, 505, 519, 542, 566, 567, 587, 614, 627, 632, 642, 648, 655, 666, 676, 686, 704, 708, 712, 726, 740, 750, 764, 766, 768] not in index'