In [1]:
# Importing the necessary modules
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

# Setting the device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Defining the functions to be approximated
def f(x1, x2, x3):
    return x1 + x2 + x3

def g(x1, x2, x3):
    return x1**2 + x2**3 + 0.5 * x3

def h(x1, x2, x3):
    return x3 * x1**(x2)


In [2]:

# Generating the data by sampling from the given ranges
x1 = np.arange(0, 10, 0.5)
x2 = np.arange(0, 10, 0.5)
x3 = np.arange(0, 10, 0.5)
X = np.array(np.meshgrid(x1, x2, x3)).T.reshape(-1, 3) # shape: (8000, 3)
y_f = f(X[:, 0], X[:, 1], X[:, 2]) # shape: (8000,)
y_g = g(X[:, 0], X[:, 1], X[:, 2]) # shape: (8000,)
y_h = h(X[:, 0], X[:, 1], X[:, 2]) # shape: (8000,)
Y = np.stack([y_f, y_g, y_h], axis=1) # shape: (8000, 3)

# Converting the data to torch tensors
X = torch.from_numpy(X).float().to(device)
Y = torch.from_numpy(Y).float().to(device)

# Splitting the data into train and test sets
train_size = int(0.8 * len(X))
test_size = len(X) - train_size
X_train, X_test = torch.split(X, [train_size, test_size])
Y_train, Y_test = torch.split(Y, [train_size, test_size])


In [3]:

# Defining the hyperparameters to be tested
num_layers_list = [2, 3, 4] # number of hidden layers
num_units_list = [16, 32, 64] # number of units per hidden layer
activation_list = [nn.ReLU(), nn.Sigmoid(), nn.Tanh()] # activation functions for hidden layers
loss_list = [F.mse_loss, F.smooth_l1_loss, F.huber_loss] # loss functions for output layer
optimizer_list = [optim.SGD, optim.Adam] # optimizer algorithms for updating parameters
learning_rate_list = [0.01, 0.001] # learning rates for optimizer
batch_size_list = [32, 64] # batch sizes for data loader
num_epochs_list = [50] # number of epochs for training


In [4]:

# Defining a function to create a neural network model with the given hyperparameters
def create_model(num_layers, num_units, activation):
    layers = []
    layers.append(nn.Linear(3, num_units)) # input layer
    layers.append(activation) # activation for input layer
    for i in range(num_layers - 1): # hidden layers
        layers.append(nn.Linear(num_units, num_units))
        layers.append(activation)
    layers.append(nn.Linear(num_units, 3)) # output layer
    model = nn.Sequential(*layers).to(device) # creating the model from the layers list
    return model


In [5]:

# Defining a function to train a neural network model with the given hyperparameters and data
def train_model(model, loss_fn, optimizer, batch_size, num_epochs):
    train_loader = DataLoader(list(zip(X_train, Y_train)), batch_size=batch_size) # creating a data loader for training data
    train_losses = [] # list to store the training losses per epoch
    for epoch in range(num_epochs): # looping over the epochs
        epoch_loss = 0 # variable to store the epoch loss
        for x_batch, y_batch in train_loader: # looping over the batches
            optimizer.zero_grad() # zeroing the gradients of the parameters
            y_pred = model(x_batch) # forward pass
            loss = loss_fn(y_pred, y_batch) # calculating the loss
            loss.backward() # backward pass
            optimizer.step() # updating the parameters
            epoch_loss += loss.item() # adding the batch loss to the epoch loss
        train_losses.append(epoch_loss / len(train_loader)) # appending the average epoch loss to the list
        print(f"Epoch {epoch + 1}, Loss: {train_losses[-1]:.4f}") # printing the epoch loss
    return train_losses # returning the list of training losses


In [6]:

# Defining a function to test a neural network model with the given data
def test_model(model, loss_fn):
    test_loader = DataLoader(list(zip(X_test, Y_test)), batch_size=len(X_test)) # creating a data loader for testing data
    with torch.no_grad(): # disabling gradient computation
        for x_batch, y_batch in test_loader: # looping over the batches
            y_pred = model(x_batch) # forward pass
            loss = loss_fn(y_pred, y_batch) # calculating the loss
            print(f"Test Loss: {loss:.4f}") # printing the test loss
            return y_pred, loss # returning the predictions and the loss


In [7]:

# Defining a function to plot the results of a neural network model with the given data and hyperparameters
def plot_results(model, y_pred, train_losses, num_layers, num_units, activation, loss_fn, optimizer, learning_rate):
    fig, axs = plt.subplots(2, 2, figsize=(12, 10)) # creating a figure with 2x2 subplots
    fig.suptitle(f"Results for model with {num_layers} layers, {num_units} units, {activation}, {loss_fn}, {optimizer}, {learning_rate}") # setting the figure title

    # Plotting the training losses
    axs[0, 0].plot(train_losses) # plotting the losses per epoch
    axs[0, 0].set_xlabel("Epoch") # setting the x-axis label
    axs[0, 0].set_ylabel("Loss") # setting the y-axis label
    axs[0, 0].set_title("Training Loss") # setting the subplot title

    # Plotting the true vs predicted values for f function
    axs[0, 1].scatter(Y_test[:, 0].cpu(), y_pred[:, 0].cpu(), c="blue", alpha=0.5) # plotting the true and predicted values as scatter points
    axs[0, 1].plot([Y_test[:, 0].min().cpu(), Y_test[:, 0].max().cpu()], [Y_test[:, 0].min().cpu(), Y_test[:, 0].max().cpu()], c="red", linestyle="--") # plotting the identity line as reference
    axs[0, 1].set_xlabel("True") # setting the x-axis label
    axs[0, 1].set_ylabel("Predicted") # setting the y-axis label
    axs[0, 1].set_title("f function") # setting the subplot title

    # Plotting the true vs predicted values for g function
    axs[1, 0].scatter(Y_test[:, 1].cpu(), y_pred[:, 1].cpu(), c="green", alpha=0.5) # plotting the true and predicted values as scatter points
    axs[1, 0].plot([Y_test[:, 1].min().cpu(), Y_test[:, 1].max().cpu()], [Y_test[:, 1].min().cpu(), Y_test[:, 1].max().cpu()], c="red", linestyle="--") # plotting the identity line as reference
    axs[1, 0].set_xlabel("True") # setting the x-axis label
    axs[1, 0].set_ylabel("Predicted") # setting the y-axis label
    axs[1, 0].set_title("g function") # setting the subplot title

    # Plotting the true vs predicted values for h function
    axs[1, 1].scatter(Y_test[:, 2].cpu(), y_pred[:, 2].cpu(), c="orange", alpha=0.5) # plotting the true and predicted values as scatter points
    axs[1, 1].plot([Y_test[:, 2].min().cpu(), Y_test[:, 2].max().cpu()], [Y_test[:, 2].min().cpu(), Y_test[:, 2].max().cpu()], c="red", linestyle="--") # plotting the identity line as reference
    axs[1, 1].set_xlabel("True") # setting the x-axis label
    axs[1, 1].set_ylabel("Predicted") # setting the y-axis label
    axs[1, 1].set_title("h function") # setting the subplot title

    fig.tight_layout() # adjusting the spacing between subplots
    plt.show() # showing the figure


In [8]:

# Defining a list to store the best results for each function
best_results = [{"loss": float("inf"), "model": None, "y_pred": None, "train_losses": None, "hyperparams": None} for _ in range(3)]


In [13]:

# Looping over all possible combinations of hyperparameters
for num_layers in num_layers_list:
    for num_units in num_units_list:
        for activation in activation_list:
            for loss_fn in loss_list:
                for optimizer_class in optimizer_list:
                    for learning_rate in learning_rate_list:
                        for batch_size in batch_size_list:
                            for num_epochs in num_epochs_list:

                                # Printing the current hyperparameters
                                print(f"num_layers: {num_layers}, num_units: {num_units}, activation: {activation}, loss_fn: {loss_fn}, optimizer: {optimizer_class}, learning_rate: {learning_rate}, batch_size: {batch_size}, num_epochs: {num_epochs}")

                                # Creating a model with the current hyperparameters
                                model = create_model(num_layers, num_units, activation)

                                # Creating an optimizer with the current hyperparameters
                                optimizer = optimizer_class(model.parameters(), lr=learning_rate)

                                # Training the model with the current hyperparameters and data
                                train_losses = train_model(model, loss_fn, optimizer, batch_size, num_epochs)

                                # Testing the model with the current hyperparameters and data
                                y_pred, test_loss = test_model(model, loss_fn)

                                # Checking if the current results are better than the previous best results for each function
                                for i in range(3):
                                    if test_loss[i].item() < best_results[i]["loss"]:
                                        # Updating the best results for the current function
                                        best_results[i]["loss"] = test_loss[i]
                                        best_results[i]["model"] = model
                                        best_results[i]["y_pred"] = y_pred
                                        best_results[i]["train_losses"] = train_losses
                                        best_results[i]["hyperparams"] = (num_layers, num_units, activation, loss_fn, optimizer_class, learning_rate)


num_layers: 2, num_units: 16, activation: ReLU(), loss_fn: <function mse_loss at 0x7f4fe412dcf0>, optimizer: <class 'torch.optim.sgd.SGD'>, learning_rate: 0.01, batch_size: 32, num_epochs: 50
Epoch 1, Loss: nan
Epoch 2, Loss: nan
Epoch 3, Loss: nan
Epoch 4, Loss: nan
Epoch 5, Loss: nan
Epoch 6, Loss: nan
Epoch 7, Loss: nan
Epoch 8, Loss: nan
Epoch 9, Loss: nan
Epoch 10, Loss: nan
Epoch 11, Loss: nan
Epoch 12, Loss: nan
Epoch 13, Loss: nan
Epoch 14, Loss: nan
Epoch 15, Loss: nan
Epoch 16, Loss: nan
Epoch 17, Loss: nan
Epoch 18, Loss: nan
Epoch 19, Loss: nan
Epoch 20, Loss: nan
Epoch 21, Loss: nan
Epoch 22, Loss: nan
Epoch 23, Loss: nan
Epoch 24, Loss: nan
Epoch 25, Loss: nan
Epoch 26, Loss: nan
Epoch 27, Loss: nan
Epoch 28, Loss: nan
Epoch 29, Loss: nan
Epoch 30, Loss: nan
Epoch 31, Loss: nan
Epoch 32, Loss: nan
Epoch 33, Loss: nan
Epoch 34, Loss: nan
Epoch 35, Loss: nan
Epoch 36, Loss: nan
Epoch 37, Loss: nan
Epoch 38, Loss: nan
Epoch 39, Loss: nan
Epoch 40, Loss: nan
Epoch 41, Loss: n

IndexError: invalid index of a 0-dim tensor. Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number

In [None]:

# Printing and plotting the best results for each function
for i in range(3):
    print(f"Best results for function {i + 1}:")
    print(f"Loss: {best_results[i]['loss']:.4f}")
    print(f"Hyperparameters: {best_results[i]['hyperparams']}")
    plot_results(best_results[i]["model"], best_results[i]["y_pred"][:, i], best_results[i]["train_losses"], *best_results[i]["hyperparams"])


In [None]:

# Saving the models and plots
torch.save(best_results[0]["model"], "f_model.pth")
torch.save(best_results[1]["model"], "g_model.pth")
torch.save(best_results[2]["model"], "h_model.pth")
fig.savefig("results.png")