SETUP

In [1]:
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os

train_path = './train.csv'
test_path = './test.csv'

titanic_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)
titanic_data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [2]:
train_data_frame = pd.DataFrame(titanic_data)
i=0
for sex in train_data_frame.Sex:
    if sex=='male':
        train_data_frame.loc[i, ['Sex']]=[1]
        i+=1
    elif sex == 'female':
        train_data_frame.loc[i, ['Sex']]=[2]
        i+=1

os.remove(train_path)
train_data_frame.to_csv('./train.csv', index=False)
titanic_data = titanic_data.dropna(axis=0)

test_data_frame = pd.DataFrame(test_data)
i=0
for sex in test_data_frame.Sex:
    if sex=='male':
        test_data_frame.loc[i, ['Sex']]=[1]
        i+=1
    elif sex == 'female':
        test_data_frame.loc[i, ['Sex']]=[2]
        i+=1


os.remove(test_path)
test_data_frame.to_csv('./test.csv', index=False)
test_data = test_data.dropna(axis=0)

In [3]:
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch']

test_X = test_data[features]
y = titanic_data.Survived
X = titanic_data[features]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
import torch.optim as optim
import torch.nn as nn

class TitanicModel(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(TitanicModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1)  # Single output neuron for binary classification

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Initialize the model
input_size = len(X.columns)
hidden_size = 64
model = TitanicModel(input_size, hidden_size)


In [5]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [7]:
# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)


In [8]:
# Create a data loader
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

print (train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x0000026BC1B53750>


In [9]:
num_epochs = 100
batch_size = 32

# Training loop
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(inputs)
        
        # Reshape the target tensor to match the size of the model's output
        labels = labels.view(-1, 1)

        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

In [10]:
# Convert the test data to PyTorch tensors
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

# Forward pass on the test data
model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs.data, 1)

# Convert the predicted tensor to numpy array
predicted = predicted.numpy()

# Convert the test labels to numpy array
y_test_array = y_test.values

# Calculate accuracy
accuracy = (predicted == y_test_array).sum().item() / len(y_test_array)

print('Accuracy on the test set: {:.2f}%'.format(accuracy * 100))


Accuracy on the test set: 37.84%
