In [14]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [15]:
# Load the Titanic dataset
train_data = pd.read_csv('train.csv')  # Make sure 'train.csv' is in the correct path
train_data.head()


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [16]:
# Fill missing 'Age' values with the median
train_data['Age'] = train_data['Age'].fillna(train_data['Age'].median())

# Fill missing 'Embarked' values with the mode (most frequent)
train_data['Embarked'] = train_data['Embarked'].fillna(train_data['Embarked'].mode()[0])

# Drop the 'Cabin' column (too many missing values)
train_data.drop('Cabin', axis=1, inplace=True)


In [17]:
# Encode 'Sex' column: male -> 0, female -> 1
train_data['Sex'] = train_data['Sex'].map({'male': 0, 'female': 1})

# Encode 'Embarked' column: C -> 0, Q -> 1, S -> 2
train_data['Embarked'] = train_data['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})


In [18]:
# Scale 'Age' and 'Fare' columns using StandardScaler
scaler = StandardScaler()
train_data[['Age', 'Fare']] = scaler.fit_transform(train_data[['Age', 'Fare']])


In [19]:
# Define features (X) and target (y)
X = train_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
y = train_data['Survived']

# Split the data into training (70%), validation (15%), and test (15%) sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [20]:
# Custom Dataset for Titanic data
class TitanicDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)  # Features
        self.y = torch.tensor(y.values, dtype=torch.long)    # Target (Survived)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create DataLoader instances
train_dataset = TitanicDataset(X_train, y_train)
val_dataset = TitanicDataset(X_val, y_val)
test_dataset = TitanicDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


In [21]:
# Neural Network Architecture
class TitanicNN(nn.Module):
    def __init__(self):
        super(TitanicNN, self).__init__()
        self.fc1 = nn.Linear(7, 64)  # 7 input features
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 2)  # Output layer: 2 classes (Survived, Not Survived)

    def forward(self, x):
        x = F.relu(self.fc1(x))  # ReLU activation
        x = F.relu(self.fc2(x))  # ReLU activation
        x = self.fc3(x)          # Output layer
        return x

# Initialize the model
model = TitanicNN()


In [22]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For binary classification (0 or 1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [24]:
# Training Loop
def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=10):
    train_losses = []
    val_accuracies = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            optimizer.zero_grad()  # Zero the gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update the weights

            running_loss += loss.item()

            # Get predictions
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        # Calculate training loss and accuracy
        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        train_losses.append(train_loss)

        # Calculate validation accuracy
        val_accuracy = evaluate_model(model, val_loader)
        val_accuracies.append(val_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, Val Accuracy: {val_accuracy:.2f}%")

    return train_losses, val_accuracies

# Evaluation on validation set
def evaluate_model(model, val_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return 100 * correct / total

# Train the model
train_losses, val_accuracies = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=30)


Epoch 1/30, Loss: 0.3700, Train Accuracy: 83.79%, Val Accuracy: 84.33%
Epoch 2/30, Loss: 0.3733, Train Accuracy: 84.11%, Val Accuracy: 84.33%
Epoch 3/30, Loss: 0.3752, Train Accuracy: 84.11%, Val Accuracy: 78.36%
Epoch 4/30, Loss: 0.3826, Train Accuracy: 83.79%, Val Accuracy: 85.07%
Epoch 5/30, Loss: 0.3670, Train Accuracy: 85.23%, Val Accuracy: 83.58%
Epoch 6/30, Loss: 0.3745, Train Accuracy: 84.91%, Val Accuracy: 85.07%
Epoch 7/30, Loss: 0.3727, Train Accuracy: 85.07%, Val Accuracy: 83.58%
Epoch 8/30, Loss: 0.3651, Train Accuracy: 85.23%, Val Accuracy: 83.58%
Epoch 9/30, Loss: 0.3654, Train Accuracy: 85.23%, Val Accuracy: 82.84%
Epoch 10/30, Loss: 0.3685, Train Accuracy: 84.75%, Val Accuracy: 84.33%
Epoch 11/30, Loss: 0.3619, Train Accuracy: 84.59%, Val Accuracy: 80.60%
Epoch 12/30, Loss: 0.3622, Train Accuracy: 84.27%, Val Accuracy: 83.58%
Epoch 13/30, Loss: 0.3638, Train Accuracy: 85.23%, Val Accuracy: 82.84%
Epoch 14/30, Loss: 0.3644, Train Accuracy: 84.43%, Val Accuracy: 83.58%
E

In [25]:
# Test accuracy
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_accuracy = 100 * correct / total
    print(f"Test Accuracy: {test_accuracy:.2f}%")

# Evaluate on test set
test_model(model, test_loader)


Test Accuracy: 75.37%
