<a href="https://colab.research.google.com/github/Vecheslav-Eremeev/sklearn_projects/blob/main/Titanic_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn

Load Data

In [None]:
data = sns.load_dataset('titanic')
data.shape

(891, 15)

In [None]:
data.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


Data preparation

In [None]:
drop_col = 'class who adult_male  embark_town alive alone'.split()

In [None]:
data = data.drop(drop_col, axis=1)

In [None]:
data.isnull().sum()

survived      0
pclass        0
sex           0
age         177
sibsp         0
parch         0
fare          0
embarked      2
deck        688
dtype: int64

In [None]:
data.deck = data.deck.isnull().astype(int)

In [None]:
data.age = data.age.fillna(data.age.mean())

In [None]:
data.embarked = data.embarked.fillna(data.embarked.mode()[0])

In [None]:
data.age = pd.cut(data.age, 8, labels=False)

In [None]:
data.fare = pd.qcut(data.fare, 4, labels=False)

In [None]:
data.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,2,1,0,0,S,1
1,1,1,female,3,1,0,3,C,0
2,1,3,female,2,0,0,1,S,1
3,1,1,female,3,1,0,3,S,0
4,0,3,male,3,0,0,1,S,1


In [None]:
X = data.drop('survived', axis=1)
y = data['survived']

In [None]:
X = pd.get_dummies(X, drop_first=True)

In [None]:
X = StandardScaler().fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.25)

In [None]:
X_train = torch.from_numpy(np.array(X_train)).float()
y_train = torch.from_numpy(np.array(y_train).reshape(-1,1)).float()

In [None]:
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)

Train Model

In [None]:
class Model_NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(9, 32),
            nn.Dropout(0.3),
            nn.Linear(32, 16),
            nn.Dropout(0.3),
            nn.Linear(16, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.network(x)

In [None]:
model = Model_NN()
model

Model_NN(
  (network): Sequential(
    (0): Linear(in_features=9, out_features=32, bias=True)
    (1): Dropout(p=0.3, inplace=False)
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): Dropout(p=0.3, inplace=False)
    (4): Linear(in_features=16, out_features=1, bias=True)
    (5): Sigmoid()
  )
)

In [None]:
loss_fn = nn.BCELoss()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [None]:
if torch.cuda.is_available():
    model = model.cuda()

In [None]:
epochs = 30

In [None]:
for epoch in range(epochs):
    score = 0
    for batch in train_loader:
        X, y = batch
        if torch.cuda.is_available():
            X = X.cuda()
            y = y.cuda()
        model.train()
        out = model(X)
        y_loss = loss_fn(out, y)
        model.zero_grad()
        y_loss.backward()
        optimizer.step()
        _, pred = torch.max(out, axis=1)
        score += (pred == y).sum()
    accuracy = score / len(X_train)
    print('{} / {} Epochs | Train loss: {:.4f} | Accuracy: {:.4f}'.format(epoch + 1, epochs, y_loss, accuracy))

1 / 30 Epochs | Train loss: 0.5146 | Accuracy: 9.8982
2 / 30 Epochs | Train loss: 0.6015 | Accuracy: 9.9042
3 / 30 Epochs | Train loss: 0.6403 | Accuracy: 9.9042
4 / 30 Epochs | Train loss: 0.6039 | Accuracy: 9.9042
5 / 30 Epochs | Train loss: 0.5717 | Accuracy: 9.9102
6 / 30 Epochs | Train loss: 0.5429 | Accuracy: 9.8982
7 / 30 Epochs | Train loss: 0.5392 | Accuracy: 9.8922
8 / 30 Epochs | Train loss: 0.7063 | Accuracy: 9.8922
9 / 30 Epochs | Train loss: 0.5607 | Accuracy: 9.8982
10 / 30 Epochs | Train loss: 0.6380 | Accuracy: 9.8802
11 / 30 Epochs | Train loss: 0.7237 | Accuracy: 9.9042
12 / 30 Epochs | Train loss: 0.5763 | Accuracy: 9.8862
13 / 30 Epochs | Train loss: 0.6721 | Accuracy: 9.8982
14 / 30 Epochs | Train loss: 0.5315 | Accuracy: 9.9042
15 / 30 Epochs | Train loss: 0.6316 | Accuracy: 9.8862
16 / 30 Epochs | Train loss: 0.6158 | Accuracy: 9.8802
17 / 30 Epochs | Train loss: 0.5203 | Accuracy: 9.8922
18 / 30 Epochs | Train loss: 0.6734 | Accuracy: 9.8982
19 / 30 Epochs | Tr

In [None]:
pred = []
for i in range(len(X_test)):
    a = X_test[i,:]
    a = torch.from_numpy(a).float()
    model.eval()
    if torch.cuda.is_available():
        a = a.cuda()
    output = model(a)
    predict = (output >= 0.5).numpy().astype('int')
    pred.append(predict[0])

In [None]:
y_pred = np.array(pred)
y_pred

array([0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 0])

In [None]:
accuracy_score(y_test, y_pred)

0.7354260089686099