# RMSprop Optimizer

After testing different values for each parameter, we identified the best configuration:

- `learning_rate`: The best results were recorded between learning rate of 0.01.
- `eps` and `alpha`: For optimal performance, the best combinations are `0.0001` with `0.3`, or `1e-08` with `0.6`.

## Results

After testing various parameters combinations, the RMSprop optimizer achieved a local maximum accuracy of `0.83` and `0.76` on kaggle.

In [1]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [2]:
class TitanicDataset(Dataset):
    def __init__(self, data_file, labels_file):
        self.data = pd.read_csv(data_file)
        self.labels = pd.read_csv(labels_file)
        self.data = torch.tensor(self.data.values, dtype=torch.float32)
        self.labels = torch.tensor(self.labels.values, dtype=torch.float32)
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

data_path = "../Titanic_data/train_data.csv"
labels_path = "../Titanic_data/train_data_labels.csv"
test_path = "../Titanic_data/train_test.csv"
test_labels_path = "../Titanic_data/train_test_labels.csv"
predict_path = "../Titanic_data/processed_test_data.csv"

dataset = TitanicDataset(data_path, labels_path)
testset = TitanicDataset(test_path, test_labels_path)
predictset = TitanicDataset(predict_path, predict_path)
print(f"Training data shape: {dataset.data.shape}")
print(f"Training labels shape: {dataset.labels.shape}")
print(f"Test data shape: {testset.data.shape}")
print(f"Test labels shape: {testset.labels.shape}")
print(f"Predict data shape: {predictset.data.shape}")

data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(testset, batch_size=32, shuffle=False)
print(f"Number of batches {len(data_loader)} of size {data_loader.batch_size}")
print(f"Number of test batches {len(test_loader)} of size {test_loader.batch_size}")

Training data shape: torch.Size([784, 3])
Training labels shape: torch.Size([784, 1])
Test data shape: torch.Size([107, 3])
Test labels shape: torch.Size([107, 1])
Predict data shape: torch.Size([418, 3])
Number of batches 25 of size 32
Number of test batches 4 of size 32


In [3]:
device = "cpu"

class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 150),
            nn.ReLU(),
            nn.Linear(150, 150),
            nn.ReLU(),
            nn.Linear(150, 150),
            nn.ReLU(),
            nn.Linear(150, 2)
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
def train(dataloader, model, loss_fn, optimizer):
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        y = y.squeeze().long()

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(dataloader, model):
    model.eval()
    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y = y.squeeze().long()
            pred = model(X)
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    correct /= len(dataloader.dataset)
    return correct

In [5]:
# train with multiple hyperparameters
lr_ranges = [0.001, 0.01]
alpha_ranges = [0.3, 0.6, 0.8, 0.99]
eps_ranges = [0.01, 0.0001, 0.000001, 0.00000001]

loss_fn = nn.CrossEntropyLoss()
epochs = 300
max_accuracy = 0
params = {}

for lr in lr_ranges:
    for eps in eps_ranges:
        for alpha in alpha_ranges:
            model = NeuralNetwork(dataset.data.shape[1]).to(device)
            optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=alpha, eps=eps)

            model_acc = 0

            for t in range(epochs):
                train(data_loader, model, loss_fn, optimizer)
                model_acc = max(model_acc, test(test_loader, model))

            if model_acc > max_accuracy:
                max_accuracy = model_acc
                params = {'lr': lr, 'eps': eps, 'alpha': alpha}
            print(f"Model accuracy: {model_acc:.3f}    Params: lr: {lr}, eps: {eps}, alpha: {alpha}")

print(f"Final max accuracy: {max_accuracy:.3f}    Params: {params}")

Model accuracy: 0.813    Params: lr: 0.001, eps: 0.01, alpha: 0.3
Model accuracy: 0.813    Params: lr: 0.001, eps: 0.01, alpha: 0.6
Model accuracy: 0.813    Params: lr: 0.001, eps: 0.01, alpha: 0.8
Model accuracy: 0.813    Params: lr: 0.001, eps: 0.01, alpha: 0.99
Model accuracy: 0.813    Params: lr: 0.001, eps: 0.0001, alpha: 0.3
Model accuracy: 0.822    Params: lr: 0.001, eps: 0.0001, alpha: 0.6
Model accuracy: 0.822    Params: lr: 0.001, eps: 0.0001, alpha: 0.8
Model accuracy: 0.813    Params: lr: 0.001, eps: 0.0001, alpha: 0.99
Model accuracy: 0.813    Params: lr: 0.001, eps: 1e-06, alpha: 0.3
Model accuracy: 0.822    Params: lr: 0.001, eps: 1e-06, alpha: 0.6
Model accuracy: 0.822    Params: lr: 0.001, eps: 1e-06, alpha: 0.8
Model accuracy: 0.822    Params: lr: 0.001, eps: 1e-06, alpha: 0.99
Model accuracy: 0.813    Params: lr: 0.001, eps: 1e-08, alpha: 0.3
Model accuracy: 0.822    Params: lr: 0.001, eps: 1e-08, alpha: 0.6
Model accuracy: 0.822    Params: lr: 0.001, eps: 1e-08, alp

In [6]:
# train with the best hyperparameters
model = NeuralNetwork(dataset.data.shape[1]).to(device)
optimizer = torch.optim.RMSprop(model.parameters(), lr=params['lr'], alpha=params['alpha'], eps=params['eps'])
for _ in range(epochs):
    train(data_loader, model, loss_fn, optimizer)
    test(test_loader, model)

# save results
model.eval()
predictions = []

with torch.no_grad():
    for i in range(len(predictset)):
        x, _ = predictset[i]  # Ignore the label
        x = x.to(device)
        pred = model(x)
        predictions.append(pred.argmax().item())

submission = pd.DataFrame({
    "PassengerId": range(892, 892 + len(predictions)),  # PassengerId starts from 892
    "Survived": predictions
})
submission.to_csv("Results_RMS.csv", index=False)
print("Predictions saved")

Predictions saved
