In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
class SimpleNN(nn.Module):
    def __init__(self, activation_function='relu'):
        super(SimpleNN, self).__init__()

        if activation_function == 'relu':
            self.activation = nn.ReLU()
        elif activation_function == 'tanh':
            self.activation = nn.Tanh()

        self.layer1 = nn.Linear(1, 2)
        self.layer2 = nn.Linear(2, 1)

    def forward(self, x):
        x = self.activation(self.layer1(x))
        x = self.layer2(x)
        return x

In [3]:
def train_model(model, X_train_tensor, y_train_tensor, lr, momentum):
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for epoch in range(100):
        model.train()
        optimizer.zero_grad()
        output = model(X_train_tensor)
        loss = criterion(output, y_train_tensor)
        loss.backward()
        optimizer.step()

    return model

In [4]:
def optimize_hyperparameters(X_train_tensor, y_train_tensor):
    best_rmse = float('inf')
    best_params = {}

    learning_rates = [0.01, 0.1, 0.001]
    momenta = [0.5, 0.9]
    activations = ['relu', 'tanh']

    for lr in learning_rates:
        for momentum in momenta:
            for activation in activations:
                print(f"Training with lr={lr}, momentum={momentum}, activation={activation}")

                model = SimpleNN(activation_function=activation)
                trained_model = train_model(model, X_train_tensor, y_train_tensor, lr, momentum)

                with torch.no_grad():
                    model.eval()
                    output = trained_model(X_train_tensor)
                    rmse = torch.sqrt(nn.MSELoss()(output, y_train_tensor))

                print(f"RMSE: {rmse.item():.4f}")

                if rmse.item() < best_rmse:
                    best_rmse = rmse.item()
                    best_params = {
                        'learning_rate': lr,
                        'momentum': momentum,
                        'activation_function': activation
                    }

    return best_params, best_rmse

In [5]:
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([[2], [4], [6], [8], [10]])

In [6]:
X_train_tensor = torch.tensor(X, dtype=torch.float32)
y_train_tensor = torch.tensor(y, dtype=torch.float32)

In [7]:
best_params, best_rmse = optimize_hyperparameters(X_train_tensor, y_train_tensor)

Training with lr=0.01, momentum=0.5, activation=relu
RMSE: 0.0298
Training with lr=0.01, momentum=0.5, activation=tanh
RMSE: 0.4697
Training with lr=0.01, momentum=0.9, activation=relu
RMSE: 0.0040
Training with lr=0.01, momentum=0.9, activation=tanh
RMSE: 0.3392
Training with lr=0.1, momentum=0.5, activation=relu
RMSE: 2.8284
Training with lr=0.1, momentum=0.5, activation=tanh
RMSE: 2.8117
Training with lr=0.1, momentum=0.9, activation=relu
RMSE: 2.8296
Training with lr=0.1, momentum=0.9, activation=tanh
RMSE: 2.8286
Training with lr=0.001, momentum=0.5, activation=relu
RMSE: 0.7101
Training with lr=0.001, momentum=0.5, activation=tanh
RMSE: 3.4813
Training with lr=0.001, momentum=0.9, activation=relu
RMSE: 0.0386
Training with lr=0.001, momentum=0.9, activation=tanh
RMSE: 1.0178


In [8]:
print("\nBest Hyperparameters:")
print(f"Learning Rate: {best_params['learning_rate']}")
print(f"Momentum: {best_params['momentum']}")
print(f"Activation Function: {best_params['activation_function']}")
print(f"Best RMSE: {best_rmse:.4f}")


Best Hyperparameters:
Learning Rate: 0.01
Momentum: 0.9
Activation Function: relu
Best RMSE: 0.0040
