In [None]:
http://melonicedlatte.com/datascience/2019/10/19/140600.html

In [1]:
# 파이썬 ≥3.5 필수
import sys
assert sys.version_info >= (3, 5)

# 사이킷런 ≥0.20 필수
import sklearn
assert sklearn.__version__ >= "0.20"

try:
    # %tensorflow_version은 코랩 명령입니다.
    %tensorflow_version 2.x
    !pip install -q -U tfx
    print("패키지 호환 에러는 무시해도 괜찮습니다.")
except Exception:
    pass

# 텐서플로 ≥2.0 필수
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# 공통 모듈 임포트
import numpy as np
import pandas as pd
import os

# 노트북 실행 결과를 동일하게 유지하기 위해
np.random.seed(40)

# 깔끔한 그래프 출력을 위해
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [2]:
train = pd.read_csv("datasets/titanic/train.csv", sep=',')
test = pd.read_csv("datasets/titanic/test.csv", sep=',')

train.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [None]:
df= train.copy()


In [5]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.embeddings import Embedding

def process_titanic(train):
    train=train.copy().dropna()
    train['Ageten']=[int(a/20) if a!=np.NaN else None  for a in train['Age']]
    train['Embarked']=[0 if e=='S' else 1 if e=='C' else 2  for e in train['Embarked']]
    train['Fare']=[10 if f>100 else int(f/10)  for f in train['Fare']]
    
    train_x=train.copy()
    train_x['Title'] = train_x.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
    train_x['Title'] = train_x['Title'].replace(['Lady', 'Countess','Capt', 'Col', 'Don',\
                                                'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'],\
                                                'Rare')
    train_x['Title'] = train_x['Title'].replace('Mlle', 'Miss')
    train_x['Title'] = train_x['Title'].replace('Ms', 'Miss')
    train_x['Title'] = train_x['Title'].replace('Mme', 'Mrs')
    _, train_x['Title'] = np.unique(train_x['Title'], return_inverse=True)
    _, train_x['Sex'] = np.unique(train_x['Sex'], return_inverse=True)
    
    train_y = np.ravel(train_x.Survived) # Make 1D
    train_x.drop(['Survived'], inplace=True, axis=1)
    return train_x, train_y

def train_general_model(train_x,train_y):
    
    keras.backend.clear_session()  
    np.random.seed(40)
    tf.random.set_seed(40)
    
    model = Sequential()

    model.add(Dense(16, activation='relu', input_shape=(train_x.shape[1],)))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

    model.fit(train_x, train_y, epochs=50, batch_size=4, verbose=1)

def train_embedding_model(train_x,train_y):
    
    keras.backend.clear_session()  
    np.random.seed(40)
    tf.random.set_seed(40)
    
    model = Sequential()

    model.add(Embedding(50, 10, input_length=train_x.shape[1]))
    model.add(Flatten())
    model.add(Dense(16, activation='relu', input_shape=(10,)))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                optimizer='adam',
                metrics=['accuracy'])

    model.fit(train_x, train_y, epochs=50, batch_size=4, verbose=1)

In [16]:
#1.embedding을 사용하지 않음
train_x,train_y=process_titanic(train.copy())
cols=['Pclass','Sex','Ageten','Title']
train_x=train_x[cols]
print(train_x.head())

train_general_model(train_x,train_y)

    Pclass  Sex  Ageten  Title
1        1    0       1      3
3        1    0       1      3
6        1    1       2      2
10       3    0       0      1
11       1    0       2      1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [15]:
#2.embedding을 사용함
train_x,train_y=process_titanic(train.copy())
cols=['Pclass','Sex','Ageten','Title']
train_x=train_x[cols]
print(train_x.head())

train_embedding_model(train_x,train_y)

    Pclass  Sex  Ageten  Title
1        1    0       1      3
3        1    0       1      3
6        1    1       2      2
10       3    0       0      1
11       1    0       2      1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
#3.Ageten제외
train_x,train_y=process_titanic(train.copy())
cols=['Pclass','Sex','Title']
train_x=train_x[cols]
print(train_x.head())

train_embedding_model(train_x,train_y)

    Pclass  Sex  Title
1        1    0      3
3        1    0      3
6        1    1      2
10       3    0      1
11       1    0      1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [13]:

#2.embedding을 사용함
train_x,train_y=process_titanic(train.copy())
cols=['Pclass','Sex','Ageten','SibSp','Parch','Embarked','Fare']
train_x=train_x[cols]
print(train_x.head())

train_embedding_model(train_x,train_y)

    Pclass  Sex  Ageten  SibSp  Parch  Embarked  Fare
1        1    0       1      1      0         1     7
3        1    0       1      1      0         0     5
6        1    1       2      0      0         0     5
10       3    0       0      1      1         0     1
11       1    0       2      0      0         0     2
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
vocab=train['Sex'].drop_duplicates()
indices = tf.range(len(vocab), dtype=tf.int64)
table_init=tf.lookup.KeyValueTensorInitializer(vocab, indices)
table = tf.lookup.StaticVocabularyTable(table_init, 2)

examples=tf.constant(train['Sex'])
examples_indices=table.lookup(examples)

one_hot = tf.one_hot(examples_indices, depth=len(vocab)+2)

vocab=train['Pclass'].drop_duplicates()
indices = tf.range(len(vocab), dtype=tf.int64)
table_init=tf.lookup.KeyValueTensorInitializer(vocab, indices)
table = tf.lookup.StaticVocabularyTable(table_init, 2)

examples=tf.constant(train['Pclass'])
examples_indices=table.lookup(examples)

one_hot2 = tf.one_hot(examples_indices, depth=len(vocab)+2)

vocab=train['Embarked'].drop_duplicates().dropna()
indices = tf.range(len(vocab), dtype=tf.int64)
table_init=tf.lookup.KeyValueTensorInitializer(vocab, indices)
table = tf.lookup.StaticVocabularyTable(table_init, 2)

examples=tf.constant(train['Embarked'].fillna("None"))
examples_indices=table.lookup(examples)

one_hot3 = tf.one_hot(examples_indices, depth=len(vocab)+2)


x_train = tf.concat([one_hot,one_hot2,one_hot3],axis=1)
y_train = np.ravel(train.Survived)

keras.backend.clear_session()  
np.random.seed(40)
tf.random.set_seed(40)

model = Sequential()

model.add(Embedding(50, 10, input_length=x_train.shape[1]))
model.add(Flatten())
model.add(Dense(16, activation='relu', input_shape=(10,)))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
            optimizer='adam',
            metrics=['accuracy'])

model.fit(x_train, y_train, epochs=50, batch_size=4, verbose=1)