In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# 1. Determine if a CUDA-enabled GPU is available and set the device accordingly.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 2. Define a simple feed-forward neural network class called 'SimpleNet' that inherits from nn.Module.
class SimpleNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNet, self).__init__()
        # Define the first fully connected (linear) layer: input_size to hidden_size.
        self.linear1 = nn.Linear(input_size, hidden_size)
        # Define the activation function (e.g., nn.Tanh()).
        self.tanh = nn.Tanh()
        # Define the second fully connected (linear) layer: hidden_size to output_size.
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Define the forward pass of the network:
        # 1. Pass the input 'x' through the first linear layer.
        out = self.linear1(x)
        # 2. Apply the activation function.
        out = self.tanh(out)
        # 3. Pass the output through the second linear layer.
        out = self.linear2(out)
        return out

# 3. Instantiate the 'SimpleNet' model with an input size of 1, a hidden size of 10, and an output size of 1.
input_size = 1
hidden_size = 10
output_size = 1
model = SimpleNet(input_size, hidden_size, output_size)

# 4. Move the model to the defined device (CPU or GPU).
model.to(device)

# 5. Generate a dummy input tensor of shape (1, 1) and move it to the same device as the model.
dummy_input = torch.randn(1, 1).to(device)

# 6. Perform a forward pass with the dummy input and print the output shape.
dummy_output = model(dummy_input)
print("Shape of the dummy output:", dummy_output.shape)

# 7. Briefly explain in the comments the role of the activation function (self.tanh) in this network.
# Your explanation here:
# The activation function (tanh in this case) introduces non-linearity into the neural network.
# Without non-linear activation functions, a neural network with multiple linear layers would be mathematically equivalent to a single linear layer, limiting its ability to learn complex, non-linear relationships in the data.
# The tanh function allows the network to model more intricate patterns by transforming the linear outputs of the first layer before they are passed to the next layer.


Using device: cuda
Shape of the dummy output: torch.Size([1, 1])
