In [34]:
import torch
import torch.nn as nn
import seaborn as sns
import pandas as pd
import statistics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

## Preparing the variables

In [58]:
df = sns.load_dataset('titanic')
print(df.head())
numerical_features = ['pclass', 'age', 'sibsp', 'parch']
df['age'].fillna(statistics.mean(df['age'].dropna()), inplace=True)

# Filling NaN's
for column in df[numerical_features].columns:
    print(f'column: {column}. na: {df[column].isna().sum()}')

features = df[numerical_features].values
labels = df['survived'].values

# Splitting
features_train, features_test, labels_train, labels_test = train_test_split(features, labels)

# Scaling features
features_train = StandardScaler().fit_transform(features_train)
features_test = StandardScaler().fit_transform(features_test)

# Transforming
dt = torch.float32
features_train, features_test, labels_train, labels_test = \
    torch.tensor(features_train, dtype=dt), torch.tensor(features_test, dtype=dt), \
        torch.tensor(labels_train, dtype=dt), torch.tensor(labels_test, dtype=dt)

# Reshaping labels
labels_train = labels_train.view(-1, 1)
labels_test = labels_test.view(-1, 1)
print(features_train.shape, labels_train.shape)
print(features_test.shape, labels_test.shape)
print(features_train.dtype)

   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male deck  embark_town alive  alone  
0    man        True  NaN  Southampton    no  False  
1  woman       False    C    Cherbourg   yes  False  
2  woman       False  NaN  Southampton   yes   True  
3  woman       False    C  Southampton   yes  False  
4    man        True  NaN  Southampton    no   True  
column: pclass. na: 0
column: age. na: 0
column: sibsp. na: 0
column: parch. na: 0
torch.Size([668, 4]) torch.Size([668, 1])
torch.Size([223, 4]) torch.Size([223, 1])
torch.float32


## Logistic regression

In [78]:
num_epochs = 1000
learning_rate = 0.5

# Step 1 (model)
class Model(nn.Module):
    def __init__(self, features_len):
        super(Model, self).__init__()
        self.linear = nn.Linear(features_len, 1)
    
    def forward(self, features):
        labels_pred = torch.sigmoid(self.linear(features))
        return(labels_pred)
    
model = Model(features_train.shape[1])

# Step 2 (loss function and optimization)
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Step 3 (fitting)
for epoch in range(num_epochs):
    labels_pred = model(features_train)
    loss = criterion(labels_pred, labels_train)
    loss.backward()
    optimizer.step()

    optimizer.zero_grad()

with torch.no_grad():
    labels_pred = model(features_test)
    labels_pred_cls = labels_pred.round()
    acc = labels_pred_cls.eq(labels_test).sum() / float(labels_test.shape[0])
    print(f'accuracy: {acc.item():.4f}')

accuracy: 0.7265
