[View in Colaboratory](https://colab.research.google.com/github/isaacyaf/example/blob/master/titanic_survival_prediction_DNN_lite.ipynb)

# Predict Survival on the Titanic


![ref. kaggle](https://kaggle2.blob.core.windows.net/competitions/kaggle/3136/logos/header.png)

The sinking of the RMS Titanic is one of the most infamous shipwrecks in history.

ref. kaggle-Titanic

In [0]:
# get titanic & test csv files
!curl -L -o titanic_train.csv "https://drive.google.com/uc?export=download&id=1Ao0T6DKiCweVleaGt4rOw625gf7qmzou"
!curl -L -o titanic_test.csv "https://drive.google.com/uc?export=download&id=1ZaMHDxISV101MgK9-hor15MN33AB3LE0"
!curl -L -o feature.csv "https://drive.google.com/uc?export=download&id=1vTwPIASmt99oKUhHNGp6_i9Hi6R2BA_J"

  
# pandas, numpy, matplotlib, sns, missingno
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.image as mpimg
import seaborn as sns
import re as re

#keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import RMSprop

#set the jupyter notebook
from IPython.display import display, HTML, Image, clear_output
%matplotlib inline
pd.set_option('max_colwidth',200)
plt.style.use('ggplot')

def plot_train_history(history, train_metrics, val_metrics):
    plt.plot(history.history.get(train_metrics),'-o')
    plt.plot(history.history.get(val_metrics),'-o')
    plt.ylabel(train_metrics)
    plt.xlabel('Epochs')
    plt.legend(['train', 'validation'])
    
def find_data(df,col,data):
  pd_tmp = pd.DataFrame(columns=df.columns.values)
  if data == None:
    pd_tmp = df[df[col].isna()] 
  else:
    for index, data_tmp in enumerate(df[col]):
      if data in str(data_tmp):
        pd_tmp=pd.concat([pd_tmp,df[index:index+1]])
  return pd_tmp

def pick_fl(df, sl):
  pd_tmp = pd.DataFrame(columns=df.columns.values)
  for sl_tmp in sl:
    pd_tmp=pd.concat([pd_tmp, find_data(df, 'C_Name', sl_tmp)])
  return pd_tmp

def pretty_print(df):
  return display(HTML(df.to_html().replace("\\n","<br>").replace('<td>','<td><p style="text-align: left;">').replace('</td>','</p></td>')))

#data
r1_images = ["https://i.imgur.com/NVHBRth.png",
  "https://i.imgur.com/DqsXI6v.png",
  "https://i.imgur.com/v0jepSR.png",
  "https://i.imgur.com/g0Nt7BQ.png",
  "https://i.imgur.com/eXkn9ap.png"]

r2_images = ["https://i.imgur.com/lqd1URe.png",
  "https://i.imgur.com/8dNd5LI.png",
  "https://i.imgur.com/OEHcMW1.png",
  "https://i.imgur.com/kI72W83.png",
  "https://i.imgur.com/VzeD1lF.png"]

r1_names = ['Duane, Mr. Frank',
  'Svensson, Mr. Johan',
  'Turkula, Mrs. (Hedwig)',
  'Herman, Miss. Alice',
  'Braund, Mr. Owen Harris']

r2_names = ['Guggenheim, Mr. Benjamin',
  'Byles, Rev. Thomas Roussel Davids',
  'Rosalie Ida Blun',
  'Lines, Miss. Mary Conover',
  'Blank, Mr. Henry']

clear_output()

# get titanic data as a DataFrame
titanic_df = pd.read_csv('titanic_train.csv')
test_df    = pd.read_csv('titanic_test.csv')
feature_df = pd.read_csv('feature.csv')

full_data = [titanic_df, test_df]

# preview the data
titanic_df.head()


|Col. Name|Col. Explanation| Data Explanation |
|---	|---	|--- |
|PassengerId|通行證號碼||   
|Pclass         |社會經濟地位 |1/2/3  = 一等/二等/三等 |
|Name           |乘客姓名||
|Name_length           |乘客姓名長度||
|Title           |頭銜| 1: Mr <br> 2: Miss   <br> 3: Mrs <br> 4: Master  <br> 5: Rare <br><br> Rare = 'Capt', 'Col', 'Countess',	'Don', 'Dr',<br> 'Jonkheer', 'Lady', 'Major', 'Rev', 'Sir'|
|Sex            |性別|0: Female <br> 1: Male|
|Age            |年齡||
|Age_Categories           |年齡分類|0: ≤ 16 <br> 1: > 16 &  ≤ 32  <br> 2: > 32 &  ≤ 48  <br> 3: > 48 & ≤ 64  <br> 4: > 64 |
|SibSp          |手足或配偶也在船上的數量||
|Parch          |父母子女也在船上的數量||
|FamilySize          |整個家族在船上的數量|SibSp + Parch + 1|
|IsAlone          |是否跟家族一起搭船|0: 否 <br> 1: 是|
|Ticket         |船票資訊||
|Fare           |票價 (單位: 英鎊)||
|Fare_Categories |票價分類|0: ≤ 7.91 <br> 1: > 7.91 &  ≤ 14.454  <br> 2: > 14.454 &  ≤ 31  <br> 3: > 31  |
|Cabin          |客艙資訊||
|Has_Cabin          |是否有記載客艙資訊|0: 否 <br> 1: 是|
|Embarked       |登船港口 |  0 = Southampton (英格蘭-南安普敦) <br> 1 = Cherbourg(法國-瑟堡)  <br> 2 = Queenstown (紐西蘭-皇后鎮)  |
|Survived       |是否存活 |0 = 否, 1 =是|


# Feature List & Model Building (Round 1)
'Pclass', 'Name_length', 'Title', 'Sex', 'Age', 'Age_Categories', 'SibSp', 'Parch', 'FamilySize', 'IsAlone', 'Fare', 'Fare_Categories', 'Has_Cabin', 'Embarked'

* Pick features 

```
# selected_list_r1= ['SibSp', 'Parch', 'FamilySize']
```




In [0]:
#'Pclass', 'Name_length', 'Title', 'Sex', 'Age', 'Age_Categories', 'SibSp', 
#'Parch', 'FamilySize', 'IsAlone', 'Fare', 'Fare_Categories', 'Has_Cabin', 'Embarked'

#================ Select your features ========================
selected_list_r1 = []

#============================================================


train_tmp = titanic_df[selected_list_r1].values
train_DLy = titanic_df[['Survived']].values
inputdims = len(train_tmp[0])
batch_size = 10
nb_classes = 1
nb_epoch = 30


#================ Build your model ========================
model = Sequential()
model.add(Dense(16, input_dim=inputdims, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.summary()
model.compile(loss='binary_crossentropy',
              optimizer='Adam',
              metrics=['accuracy'])
#============================================================


#Learning
history = model.fit(train_tmp, 
                    train_DLy,
                    batch_size=batch_size, 
                    epochs=nb_epoch,
                    verbose=2, 
                    validation_split=0.1)

clear_output()
print("Learning is compelted.")
print("The prediction result of the AI model is:")

for nametmp, image_url in zip(r1_names, r1_images):
  display(Image(url= image_url, width=150))
  p_result = model.predict(
          find_data(test_df, 'Name', nametmp)[selected_list_r1].astype(np.float32))[0][0]*100
  a_nums = find_data(test_df, 'Name', nametmp)["Nums"].astype(np.float32)
  print("編號: {2}\t姓名: {0}\n存活率: {1:.2f}%\n特徵:".format(
      nametmp, p_result, a_nums))
  display(find_data(test_df, 'Name', nametmp)[selected_list_r1])
  print("\n")

In [0]:
pd_tmp = pick_fl(feature_df, selected_list_r1)
pretty_print(pd_tmp)

plt.figure(figsize=(21,14))
gs = gridspec.GridSpec(2, 3)
gs.update(wspace=0.025, hspace=0.4)


for nametmp, image_url, tmpi in zip(r1_names, r1_images, range(5)):
  img=mpimg.imread(image_url)
  p_result = model.predict(
          find_data(test_df, 'Name', nametmp)[selected_list_r1].astype(np.float32))[0][0]*100
  a_nums = find_data(test_df, 'Name', nametmp)["Nums"].astype(np.float32)
  texttmp = " Number: {2}\n Name: {0}\n Survivability: {1:.2f}%\n".format(
    nametmp, p_result, a_nums)
  tmpdf = find_data(test_df, 'Name', nametmp)[selected_list_r1]
  ax = plt.subplot(gs[tmpi])
  ax.imshow(img)
  ax.axis('off')
  ax.text(0,-0.4, texttmp, size=16, ha="left", 
           transform=ax.transAxes)
  ax_t = ax.table(cellText = tmpdf.values, colLabels=tmpdf.columns)
  ax_t.scale(1.25, 2)
  ax_t.auto_set_font_size(False)
  ax_t.set_fontsize(14)

plt.show()

# Feature List & Model Building (Round 2)
'Pclass', 'Name_length', 'Title', 'Sex', 'Age', 'Age_Categories', 'SibSp', 'Parch', 'FamilySize', 'IsAlone', 'Fare', 'Fare_Categories', 'Has_Cabin', 'Embarked'

* Pick features 

```
# selected_list_r2= ['SibSp', 'Parch', 'FamilySize']
```

In [0]:
#'Pclass', 'Name_length', 'Title', 'Sex', 'Age', 'Age_Categories', 'SibSp', 
#'Parch', 'FamilySize', 'IsAlone', 'Fare', 'Fare_Categories', 'Has_Cabin', 'Embarked'

#================ Select your features ========================
selected_list_r2 = []

#============================================================


train_tmp = titanic_df[selected_list_r2].values
train_DLy = titanic_df[['Survived']].values
inputdims = len(train_tmp[0])
batch_size = 10
nb_classes = 1
nb_epoch = 30


#================ Build your model ========================
model = Sequential()
model.add(Dense(16, input_dim=inputdims, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.summary()
model.compile(loss='binary_crossentropy',
              optimizer='Adam',
              metrics=['accuracy'])
#============================================================


#Learning
history = model.fit(train_tmp, 
                    train_DLy,
                    batch_size=batch_size, 
                    epochs=nb_epoch,
                    verbose=2, 
                    validation_split=0.1)

clear_output()
print("Learning is compelted.")
print("The prediction result of the AI model is:")

for nametmp, image_url in zip(r2_names, r2_images):
  display(Image(url= image_url, width=150))
  p_result = model.predict(
          find_data(test_df, 'Name', nametmp)[selected_list_r2].astype(np.float32))[0][0]*100
  a_nums = find_data(test_df, 'Name', nametmp)["Nums"].astype(np.float32)
  print("編號: {2}\t姓名: {0}\n存活率: {1:.2f}%\n特徵:".format(
      nametmp, p_result, a_nums))
  display(find_data(test_df, 'Name', nametmp)[selected_list_r2])
  print("\n")

In [0]:
pd_tmp = pick_fl(feature_df, selected_list_r2)
pretty_print(pd_tmp)

plt.figure(figsize=(21,14))
gs = gridspec.GridSpec(2, 3)
gs.update(wspace=0.025, hspace=0.4)


for nametmp, image_url, tmpi in zip(r2_names, r2_images, range(5)):
  img=mpimg.imread(image_url)
  p_result = model.predict(
          find_data(test_df, 'Name', nametmp)[selected_list_r2].astype(np.float32))[0][0]*100
  a_nums = find_data(test_df, 'Name', nametmp)["Nums"].astype(np.float32)
  texttmp = " Number: {2}\n Name: {0}\n Survivability: {1:.2f}%\n".format(
    nametmp, p_result, a_nums)
  tmpdf = find_data(test_df, 'Name', nametmp)[selected_list_r2]
  ax = plt.subplot(gs[tmpi])
  ax.imshow(img)
  ax.axis('off')
  ax.text(0,-0.4, texttmp, size=16, ha="left", 
           transform=ax.transAxes)
  ax_t = ax.table(cellText = tmpdf.values, colLabels=tmpdf.columns)
  ax_t.scale(1.25, 2)
  ax_t.auto_set_font_size(False)
  ax_t.set_fontsize(14)

plt.show()

In [0]:
JandR_df = pd.DataFrame(
  [ 
  [3, "Jack Dawson", 11, 1, 1, 20.0, 1, 0, 0, 1, 1, "x", 7.0, 0, "x", 0, 0, 0],
  [1, "Rose DeWitt Bukater", 19, 2, 0, 17.0, 1, 0, 0, 1, 1, "x", 20.0, 2, "x", 1, 0, 1]
  ]
, columns=['Pclass', 'Name', 'Name_length', 'Title', 'Sex', 'Age', 
           'Age_Categories', 'SibSp', 'Parch', 'FamilySize', 'IsAlone', 
           'Ticket', 'Fare', 'Fare_Categories', 'Cabin', 'Has_Cabin', 
           'Embarked','Survived'])

#JandR_df

print(model.predict_classes(find_data(JandR_df, 'Name', 'Jack')[selected_list_r2].values))
print(model.predict_classes(find_data(JandR_df, 'Name', 'Rose')[selected_list_r2].values))
    