# Deep Learning by TensorFlow 1.x version 

<br>

## 1. Data loading & preprocessing

In [2]:
import tensorflow as tf
tf.__version__

'1.13.1'

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
titanic_df = pd.read_csv("titanic.csv")
titanic_df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [4]:
del titanic_df['Cabin'] # 너무 많은 결측치가 존재
del titanic_df['PassengerId'] # Passenger 번호는 큰 의미를 갖고있지 않은 일련번호
del titanic_df['Ticket'] # ticket 번호에서 패턴이 확인되지 않음


titanic_df['Title'] = titanic_df['Name'].str.extract('([A-Za-z]+)\.', expand=False)
del titanic_df['Name'] # 호칭만 남김

rare_title = []
for title in set(titanic_df['Title']):
    if list(titanic_df['Title']).count(title) < 10:
        rare_title.append(title)

titanic_df['Title'] = titanic_df['Title'].replace('Mlle', 'Miss') # Mademoiselle
titanic_df['Title'] = titanic_df['Title'].replace('Ms', 'Miss') 
titanic_df['Title'] = titanic_df['Title'].replace('Mme', 'Mrs') # Madame
titanic_df['Title'] = titanic_df['Title'].replace(rare_title, 'Rare')

title_mapping = {"Master":1, "Miss":2, "Mr":3, "Mrs":4, "Rare":5 }
titanic_df['Title'] = titanic_df['Title'].map(title_mapping)
titanic_df['Title'] = titanic_df['Title'].fillna(0)
titanic_df['Title'].astype(int)


sex_mapping = {"male": 0 , "female":1} 
titanic_df['Sex'] = titanic_df['Sex'].map(sex_mapping)


titanic_df['Embarked'] = titanic_df['Embarked'].fillna('S')
mapping_data ={"S":0, "Q":1, "C":2}
titanic_df["Embarked"] = titanic_df["Embarked"].map(mapping_data)


titanic_df.loc[ titanic_df['Fare'] <= 102, 'Fare'] = 0,
titanic_df.loc[(titanic_df['Fare'] > 102) & (titanic_df['Fare'] <= 204), 'Fare'] = 1,
titanic_df.loc[(titanic_df['Fare'] > 204) & (titanic_df['Fare'] <= 307), 'Fare'] = 2,
titanic_df.loc[ titanic_df['Fare'] > 307, 'Fare'] = 4


titanic_df["FamilySize"] = titanic_df["SibSp"] + titanic_df["Parch"] +1
del titanic_df['SibSp']
del titanic_df['Parch']

titanic_df['isAlone'] = 0
titanic_df.loc[titanic_df['FamilySize'] == 1, 'isAlone'] = 1

family_mapping = {1: 0, 2: 0.4, 3: 0.8, 4: 1.2, 5: 1.6, 6: 2, 7: 2.4, 8: 2.8, 9: 3.2, 10: 3.6, 11: 4}
titanic_df['FamilySize'] = titanic_df['FamilySize'].map(family_mapping)


titanic_df["Age"].fillna(titanic_df.groupby("Title")["Age"].transform("median"), inplace=True)

titanic_df.loc[ titanic_df['Age'] <= 16, 'Age'] = 0,
titanic_df.loc[(titanic_df['Age'] > 16) & (titanic_df['Age'] <= 32), 'Age'] = 1,
titanic_df.loc[(titanic_df['Age'] > 32) & (titanic_df['Age'] <= 48), 'Age'] = 2,
titanic_df.loc[(titanic_df['Age'] > 48) & (titanic_df['Age'] <= 64), 'Age'] = 3,
titanic_df.loc[ titanic_df['Age'] > 64, 'Age'] = 4

titanic_df.head(3)

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked,Title,FamilySize,isAlone
0,0,3,0,1.0,0.0,0,3,0.4,0
1,1,1,1,2.0,0.0,2,4,0.4,0
2,1,3,1,1.0,0.0,0,2,0.0,1


<br>

## 2. Devide dataframe into X & Y -> Train X / Test X / Train Y / Test Y

In [5]:
from sklearn import model_selection

titanic_target = titanic_df[['Survived']].copy()
titanic_data = titanic_df.copy()
del titanic_data['Survived']

train_data, test_data, train_label, test_label = model_selection.train_test_split(titanic_data, titanic_target,
                                                                                 test_size=0.3,
                                                                                 random_state=0)
print(train_data.shape)
print(test_data.shape)
print(train_label.shape)
print(test_label.shape)

(623, 8)
(268, 8)
(623, 1)
(268, 1)


<br>

## 3. Change normal labels to one-hot labels

In [6]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(categories='auto')

enc.fit(train_label)
train_label = enc.transform(train_label).toarray()

enc.fit(test_label)
test_label = enc.transform(test_label).toarray()

print(train_label.shape)
print(test_label.shape)

(623, 2)
(268, 2)


<br>

## 4. Build & Train the model 

In [7]:
import tensorflow as tf
from tensorflow import layers

import os
tf.logging.set_verbosity(tf.logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [8]:
X = tf.placeholder(tf.float32, [None, 8]) # number of X_data's columns
Y = tf.placeholder(tf.float32, [None, 2]) # number of Y_data's columns (one-hot vector)

dropout_sign = tf.placeholder(tf.bool) # layers.dropout() 은 True/False 로 Training/Testing 여부를 결정해 줄 수 있습니다.

In [9]:
L1 = layers.dense(X, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal())
L1 = layers.dropout(L1, rate=0.2, training=dropout_sign) # layers.dropout()의 "rate"는 keeping rate가 아닌 dropping rate이며, default 값은 0.5입니다.

L2 = layers.dense(L1, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal()) 
L2 = layers.dropout(L2, rate=0.2, training=dropout_sign) 

L3 = layers.dense(L2, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal()) 
L3 = layers.dropout(L3, rate=0.2, training=dropout_sign) 

L4 = layers.dense(L3, 256, activation=tf.nn.relu, kernel_initializer=tf.keras.initializers.he_normal()) 
L4 = layers.dropout(L4, rate=0.2, training=dropout_sign) 

model = layers.dense(L4, 2, activation=None) # 2 == # of Y_data's columns

In [10]:
cost = tf.losses.softmax_cross_entropy(Y, model) 
optimizer = tf.train.AdamOptimizer(2e-3).minimize(cost) # 1e-2 == 0.01

In [11]:
is_correct = tf.equal(tf.argmax(model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

In [12]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [17]:
total_epoch = 100
total_cost = 0 # cost

for epoch in range(total_epoch):
    train_cp = [] # Training accuracy 를 동시에 출력해보도록 합니다.
    
    # 
    
    # 매 Epoch마다 Total cost를 출력합니다.
    _, cost_val = sess.run([optimizer, cost], feed_dict={X: train_data, Y: train_label, dropout_sign: True}) # , dropout_sign: True
    total_cost += cost_val # cost

    # 매 Epoch마다 Training/Test accuracy를 출력합니다. (dropout_sign을 False로 바꾸어 dropout을 걷어내줘야 합니다.)
    train_cp += sess.run([is_correct], feed_dict={X: train_data, Y: train_label, dropout_sign: False}) # , dropout_sign: False
    test_accuracy = sess.run([accuracy], feed_dict={X: test_data, Y: test_label, dropout_sign: False})
        
    #print(epoch,test_accuracy[0])
    if epoch % 10 == 0:
        print('Epoch:', '%04d' % (epoch + 1),
              '|| Avg. cost =', '{:.3f}'.format(total_cost / (epoch + 1)), # cost
              '|| Training accuracy : {:.3f}'.format(np.mean(train_cp)), # Training accuracy
              '|| Test accuracy : {:.3f}'.format(float(test_accuracy[0]))) # Test accuracy
       
print('Learning process is completed!')

Epoch: 0001 || Avg. cost = 0.358 || Training accuracy : 0.857 || Test accuracy : 0.821
Epoch: 0011 || Avg. cost = 0.359 || Training accuracy : 0.862 || Test accuracy : 0.817
Epoch: 0021 || Avg. cost = 0.359 || Training accuracy : 0.862 || Test accuracy : 0.821
Epoch: 0031 || Avg. cost = 0.358 || Training accuracy : 0.860 || Test accuracy : 0.825
Epoch: 0041 || Avg. cost = 0.359 || Training accuracy : 0.860 || Test accuracy : 0.817
Epoch: 0051 || Avg. cost = 0.359 || Training accuracy : 0.862 || Test accuracy : 0.821
Epoch: 0061 || Avg. cost = 0.358 || Training accuracy : 0.862 || Test accuracy : 0.817
Epoch: 0071 || Avg. cost = 0.358 || Training accuracy : 0.864 || Test accuracy : 0.817
Epoch: 0081 || Avg. cost = 0.357 || Training accuracy : 0.864 || Test accuracy : 0.821
Epoch: 0091 || Avg. cost = 0.357 || Training accuracy : 0.860 || Test accuracy : 0.821
Learning process is completed!


sess.run(optimizer,feed_dict={ }) **가 실행되어야 process가 진행된다.**    
**optimizer와 연관된 변수들은 계속해서 update 된다.**

In [None]:
# Test accuracy 를 출력합니다. dropout_sign을 False로 바꾸어 dropout을 걷어내줘야 합니다.
print('Test accuracy : {}'.format(sess.run(accuracy, 
                                           feed_dict={
                                               X: test_data, 
                                               Y: test_label, dropout_sign: False}))) # , dropout_sign: False

In [None]:
# 모델이 실제로 예측한 값을 출력합니다.
predicted_labels = sess.run(tf.argmax(model, 1), feed_dict={X: test_data, dropout_sign: False}) # test_label 불필요
print(predicted_labels)

sess.close()