In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split

In [19]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)


In [20]:
# Define the Neural Network
class SimpleNN(nn.Module):
    def __init__(self, activation_function='relu'):
        super(SimpleNN, self).__init__()
        self.layer1 = nn.Linear(2, 2)

        if activation_function == 'relu':
            self.activation = nn.ReLU()
        elif activation_function == 'tanh':
            self.activation = nn.Tanh()
        else:
            raise ValueError("Unsupported activation function")

        self.output_layer = nn.Linear(2, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.activation(self.layer1(x))
        x = self.sigmoid(self.output_layer(x))
        return x

In [21]:
# Train the model
def train_model(model, X_train, y_train, lr, momentum, epochs=100):
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()

    return model

In [22]:
# Hyperparameter tuning
def optimize_hyperparameters(X_train, y_train):
    best_accuracy = 0
    best_params = {}

    learning_rates = [0.01, 0.1]
    momenta = [0.5, 0.9]
    activations = ['relu', 'tanh']

    for lr in learning_rates:
        for momentum in momenta:
            for activation in activations:
                print(f"Training with lr={lr}, momentum={momentum}, activation={activation}")

                model = SimpleNN(activation_function=activation)
                trained_model = train_model(model, X_train, y_train, lr, momentum)

                with torch.no_grad():
                    model.eval()
                    predictions = trained_model(X_train)
                    predicted = (predictions > 0.5).float()
                    accuracy = (predicted == y_train).float().mean()

                print(f"Accuracy: {accuracy.item():.4f}")

                if accuracy.item() > best_accuracy:
                    best_accuracy = accuracy.item()
                    best_params = {
                        'learning_rate': lr,
                        'momentum': momentum,
                        'activation_function': activation
                    }

    return best_params, best_accuracy

In [23]:
# Dataset: AND logic gate
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
y = np.array([[0], [0], [0], [1]], dtype=np.float32)

In [24]:
# Convert to PyTorch tensors
X_tensor = torch.tensor(X)
y_tensor = torch.tensor(y)

In [25]:
# Run hyperparameter optimization
best_params, best_accuracy = optimize_hyperparameters(X_tensor, y_tensor)

# Display best configuration
print("\nBest Hyperparameters Found:")
print(f"Learning Rate: {best_params['learning_rate']}")
print(f"Momentum: {best_params['momentum']}")
print(f"Activation Function: {best_params['activation_function']}")
print(f"Best Training Accuracy: {best_accuracy:.4f}")

Training with lr=0.01, momentum=0.5, activation=relu
Accuracy: 0.2500
Training with lr=0.01, momentum=0.5, activation=tanh
Accuracy: 0.7500
Training with lr=0.01, momentum=0.9, activation=relu
Accuracy: 0.7500
Training with lr=0.01, momentum=0.9, activation=tanh
Accuracy: 0.7500
Training with lr=0.1, momentum=0.5, activation=relu
Accuracy: 1.0000
Training with lr=0.1, momentum=0.5, activation=tanh
Accuracy: 1.0000
Training with lr=0.1, momentum=0.9, activation=relu
Accuracy: 1.0000
Training with lr=0.1, momentum=0.9, activation=tanh
Accuracy: 1.0000

Best Hyperparameters Found:
Learning Rate: 0.1
Momentum: 0.5
Activation Function: relu
Best Training Accuracy: 1.0000
