In [63]:
%%capture
%pip install numpy
!pip install torch
%pip install pandas

In [64]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd

from torch.utils.data import Dataset, random_split
from torch.utils.data import DataLoader

In [65]:
full_dataset = pd.read_csv('./titanic/train.csv')

# Preprocess the data
y = full_dataset['Survived']
X_unprocessed = full_dataset[['Pclass', 'Sex', 'SibSp', 'Parch', 'Age', 'Fare', 'Embarked']].copy()

# Fill missing values
X_unprocessed['Fare'] = X_unprocessed['Fare'].fillna(X_unprocessed['Fare'].median())
X_unprocessed['Age'] = X_unprocessed['Age'].fillna(X_unprocessed['Age'].median())
X_unprocessed['Embarked'] = X_unprocessed['Embarked'].fillna(X_unprocessed['Embarked'].mode()[0])

# Standardize numerical columns
num_cols = ['Age', 'Fare', 'SibSp', 'Parch']
X_unprocessed[num_cols] = (X_unprocessed[num_cols] - X_unprocessed[num_cols].mean()) / X_unprocessed[num_cols].std()

# One-hot encode categorical columns
X = pd.get_dummies(X_unprocessed).astype(float)

X = torch.tensor(X.values).float()
y = torch.tensor(y).long()


In [66]:

train_size = int(len(X) * 0.8)
test_size = len(X) - train_size 


seed = torch.Generator().manual_seed(42)
train_dataset, test_dataset= random_split(torch.utils.data.TensorDataset(X,y), [train_size, test_size], generator=seed)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

input_dim = X.shape[1]
y_train_t = y[train_dataset.indices]
num_classes = len(torch.unique(y_train_t))


In [67]:
def accuracy(model, loader):
    model.eval()
    correct = 0
    total = 0

    for X, y in loader:
        outputs = model(X)
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)

    return correct / total

In [68]:
def train_model(model, train_loader, test_loader, epochs=100):
    loss_fn = torch.nn.CrossEntropyLoss()
    lr = 1e-2
    
    opt = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=0.001)

    for epoch in range(epochs):
        model.train()

        for X_train, y_train in train_loader:
            opt.zero_grad()
            z = model(X_train) 
            loss = loss_fn(z,y_train) 
            loss.backward() 
            opt.step() 

        train_acc = accuracy(model, train_loader)
        test_acc = accuracy(model, test_loader)

        print(f"Epoch: {epoch+1}")
        print(f"Loss: {loss.item():.4f} | Train Accuracy: {train_acc:.4f} | Test Accuracy: {test_acc:.4f}\n")
       

In [69]:
linear_model = torch.nn.Linear(input_dim, num_classes)

train_model(linear_model, train_loader, test_loader)

Epoch: 1
Loss: 0.6390 | Train Accuracy: 0.3469 | Test Accuracy: 0.2514

Epoch: 2
Loss: 0.7345 | Train Accuracy: 0.5843 | Test Accuracy: 0.6034

Epoch: 3
Loss: 0.4150 | Train Accuracy: 0.6419 | Test Accuracy: 0.6760

Epoch: 4
Loss: 0.5621 | Train Accuracy: 0.6531 | Test Accuracy: 0.7039

Epoch: 5
Loss: 0.5634 | Train Accuracy: 0.6685 | Test Accuracy: 0.7095

Epoch: 6
Loss: 0.7664 | Train Accuracy: 0.6756 | Test Accuracy: 0.7095

Epoch: 7
Loss: 0.5892 | Train Accuracy: 0.6882 | Test Accuracy: 0.7263

Epoch: 8
Loss: 0.5168 | Train Accuracy: 0.6994 | Test Accuracy: 0.7318

Epoch: 9
Loss: 0.4888 | Train Accuracy: 0.7093 | Test Accuracy: 0.7430

Epoch: 10
Loss: 0.5818 | Train Accuracy: 0.7107 | Test Accuracy: 0.7486

Epoch: 11
Loss: 0.4542 | Train Accuracy: 0.7191 | Test Accuracy: 0.7430

Epoch: 12
Loss: 0.4835 | Train Accuracy: 0.7163 | Test Accuracy: 0.7486

Epoch: 13
Loss: 0.3773 | Train Accuracy: 0.7261 | Test Accuracy: 0.7598

Epoch: 14
Loss: 0.6821 | Train Accuracy: 0.7303 | Test Accur

In [70]:
sgd_model = torch.nn.Sequential(
    torch.nn.Linear(input_dim, 100),
    torch.nn.ReLU(),
    torch.nn.Linear(100, num_classes)
)

train_model(sgd_model, train_loader, test_loader)

Epoch: 1
Loss: 0.6043 | Train Accuracy: 0.6166 | Test Accuracy: 0.7151

Epoch: 2
Loss: 0.6625 | Train Accuracy: 0.6882 | Test Accuracy: 0.7486

Epoch: 3
Loss: 0.6070 | Train Accuracy: 0.6966 | Test Accuracy: 0.7709

Epoch: 4
Loss: 0.6313 | Train Accuracy: 0.7022 | Test Accuracy: 0.7709

Epoch: 5
Loss: 0.5130 | Train Accuracy: 0.7037 | Test Accuracy: 0.7654

Epoch: 6
Loss: 0.6388 | Train Accuracy: 0.7388 | Test Accuracy: 0.7877

Epoch: 7
Loss: 0.5305 | Train Accuracy: 0.7444 | Test Accuracy: 0.7989

Epoch: 8
Loss: 0.5470 | Train Accuracy: 0.7500 | Test Accuracy: 0.8045

Epoch: 9
Loss: 0.8402 | Train Accuracy: 0.7612 | Test Accuracy: 0.8101

Epoch: 10
Loss: 0.4407 | Train Accuracy: 0.7654 | Test Accuracy: 0.8101

Epoch: 11
Loss: 0.5084 | Train Accuracy: 0.7683 | Test Accuracy: 0.8212

Epoch: 12
Loss: 0.7356 | Train Accuracy: 0.7739 | Test Accuracy: 0.8268

Epoch: 13
Loss: 0.5998 | Train Accuracy: 0.7753 | Test Accuracy: 0.8436

Epoch: 14
Loss: 0.4800 | Train Accuracy: 0.7767 | Test Accur