In [None]:
import numpy as np
import pandas as pd
from pandas import Series
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')
sns.set(font_scale=2.5)

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
encodings = []
def number_encoding(df, list_col, print_map=True):
    global encodings
    for col in list_col:
        dict_map = {}
        i = 0
        for idx in sorted(df[col].unique()):
            dict_map[idx] = i
            i += 1
        df[col] = df[col].map(dict_map)
        
        if print_map:
            print(dict_map)
        encodings += [dict_map]
        
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

In [None]:
train['Family'] = train['SibSp'] + train['Parch'] + 1
test['Family'] = test['SibSp'] + test['Parch'] + 1

test.loc[test.Fare.isnull(), 'Fare'] = test['Fare'].mean()
train.loc[train.Fare.isnull(), 'Fare'] = train['Fare'].mean()

train['Fare'] = train['Fare'].map(lambda x: np.log(x) if x > 0 else 0)
test['Fare'] = test['Fare'].map(lambda x: np.log(x) if x > 0 else 0)

In [None]:
test.head()

## Filling null

### Filling age

In [None]:
train['Initial'] = train.Name.str.extract('([A-Za-z]+)\.')
display(train['Initial'].head())
test['Initial'] = train.Name.str.extract('([A-Za-z]+)\.')
display(test['Initial'].head())

In [None]:
pd.crosstab(train['Initial'], train['Sex']).T.style.background_gradient(cmap='summer_r')

In [None]:
train['Initial'].replace(['Mlle','Mme','Ms','Dr','Major','Lady','Countess','Jonkheer','Col','Rev','Capt','Sir','Don', 'Dona'],
                        ['Miss','Miss','Miss','Mr','Mr','Mrs','Mrs','Other','Other','Other','Mr','Mr','Mr', 'Mr'],inplace=True)

test['Initial'].replace(['Mlle','Mme','Ms','Dr','Major','Lady','Countess','Jonkheer','Col','Rev','Capt','Sir','Don', 'Dona'],
                        ['Miss','Miss','Miss','Mr','Mr','Mrs','Mrs','Other','Other','Other','Mr','Mr','Mr', 'Mr'],inplace=True)

In [None]:
train[['Age', 'Initial']].groupby('Initial').mean()

In [None]:
train.loc[train.Age.isnull() & (train.Initial == 'Mr'), 'Age'] = 32.7
train.loc[train.Age.isnull() & (train.Initial == 'Miss'), 'Age'] = 21.9
train.loc[train.Age.isnull() & (train.Initial == 'Master'), 'Age'] = 4.5
train.loc[train.Age.isnull() & (train.Initial == 'Mrs'), 'Age'] = 36.0
train.loc[train.Age.isnull() & (train.Initial == 'Other'), "Age"] = 45.9

test.loc[test.Age.isnull() & (test.Initial == 'Mr'), 'Age'] = 32.7
test.loc[test.Age.isnull() & (test.Initial == 'Miss'), 'Age'] = 21.9
test.loc[test.Age.isnull() & (test.Initial == 'Master'), 'Age'] = 4.5
test.loc[test.Age.isnull() & (test.Initial == 'Mrs'), 'Age'] = 36.0
test.loc[test.Age.isnull() & (test.Initial == 'Other'), "Age"] = 45.9

### Filling embarked

In [None]:
train['Embarked'].isnull().sum()
#Just fill it S

In [None]:
train['Embarked'].fillna('S', inplace=True)
test['Embarked'].fillna('S', inplace=True)
train['Embarked'].isnull().sum()
#Just fill it S

## String to numerical

In [None]:
train.head(2)

In [None]:
number_encoding(train, ['Sex', 'Initial', 'Embarked'])
number_encoding(test, ['Sex', 'Initial', 'Embarked'])

In [None]:
train.head(2)

In [None]:
heatmap = train[['Survived', 'Pclass', 'Sex', 'Fare', 'Embarked', 'Family', 'Initial', 'Age']]
sns.heatmap(heatmap.astype(float).corr(), cmap=plt.cm.RdBu, annot=True)

### Dropping Columns

In [None]:
train.drop(['PassengerId', 'Name', 'SibSp', 'Parch', 'Ticket', 'Cabin'], axis=1, inplace=True)
test.drop(['PassengerId', 'Name', 'SibSp', 'Parch', 'Ticket', 'Cabin'], axis=1, inplace=True)

In [None]:
train.head(2)

## Building model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data_utils
from torchvision import transforms

### Datasets

train_X = train
train_Y = tmp_Y = train['Survived'].values
died = [0, 1]
survived = [1, 0]
is_died = lambda x: [1, 0] if x == 1 else [0, 1]
train_Y = [is_died(i) for i in tmp_Y]
del train['Survived']

In [None]:

train_X = train
train_Y = train['Survived']
del train['Survived']

train_X = torch.tensor(train_X.values).float()
train_Y = torch.tensor(train_Y.values).float()

In [None]:
#train_Y = train_Y.reshape(-1, 1)

In [None]:
print(train_X.shape, train_Y.shape)

### Model

In [None]:
class Survived(nn.Module):
    def __init__(self):
        super(Survived, self).__init__()
        self.linear = nn.Linear(7, 1) #7 : input, 1 : output (died or survived)
        
    def forward(self, x):
        return F.sigmoid(self.linear(x))
    
survived = Survived()
survived

In [None]:
train_set = data_utils.TensorDataset(train_X, train_Y)
train_loader = data_utils.DataLoader(train_set, batch_size = 64, shuffle = True)

In [None]:
optimizer = optim.Adam(survived.parameters(), lr=0.01)
get_loss = nn.BCELoss()  
loss_graph = []

In [None]:
from torch.autograd import Variable
num_epoch = 10
for epoch in range(num_epoch):
    for x, y in train_loader:
        optimizer.zero_grad()
        out = survived(x)
        loss = get_loss(out, y)
        loss.backward()
        optimizer.step()
        loss_graph.append(loss.data)
    print('[{}], loss : {}'.format(epoch, loss.data))

In [None]:
plt.plot(loss_graph)
print('Last Loss Value : {}'.format(loss_graph[-1]))

## Accuracy

In [None]:
# Test the Model
correct = 0
total = 0
for x, y in train_loader:
    out = survived(x)
    _, predicted = torch.max(out.data, 1)
    out = (out > 0.5).float()
    correct += (out == y.reshape(-1, 1)).float().sum()
    total += x.size(0)
#print(int(correct) / float(total))
print('Accuracy : {:.2f}%'.format(100 * int(correct) / float(total)))

In [None]:
train.head(0)

pclass = int(input('What is your Pclass? : '))
sex = int(input('What is your sex? male: 1, female : 0 :'))
age = float(input('What is your age? in float format : '))
embarked = int(input('Embarked (0, 1, 2) : '))
initial = int(input('Your initial : (0, 1, 2, 3, 4) : '))
fare = float(input('Fare in float : '))
family = int(input('Number of family in titanic : '))

x = torch.tensor([pclass, sex, age, fare, embarked, family, initial])

out = survived(x)
int((out > 0.5))