### Part b (L2 Regularization)

#### Introduction
Not too many changes from L1-Regularization, just implemented a new L2_Regularization function. 

#### Results
I achieved a L2-relative error of 0.049 < 0.05, by using 100,000 epochs, a learning rate of 0.001, and a l2_lambda of 0.000001. 

In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

# Define the oscillatory function
def f(x):
    result = torch.zeros_like(x)
    result[x < 0] = 5 + sum(torch.sin(k * x[x < 0]) for k in range(1, 5))
    result[x >= 0] = torch.cos(10 * x[x >= 0])
    return result

In [2]:
# Generate training data
x_train = torch.linspace(-np.pi, np.pi, 80)
y_train = f(x_train) + torch.randn(x_train.size()) * 0.1  # Adding Gaussian noise

# Generate testing data
x_test = torch.linspace(-np.pi, np.pi, 1000)
y_test = f(x_test)

In [3]:
# Define the neural network
class ReLUNet(nn.Module):
    def __init__(self):
        super(ReLUNet, self).__init__()
        self.fc1 = nn.Linear(1, 50)  # Input layer to hidden layer with 50 neurons
        self.fc2 = nn.Linear(50, 50)  # Hidden layer to another hidden layer with 50 neurons
        self.fc3 = nn.Linear(50, 1)  # Hidden layer to output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x.unsqueeze(1)))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x.squeeze()

In [23]:
# Instantiate the model, loss function, and optimizer
model = ReLUNet()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
l2_lambda = 0.000001  # Regularization strength
# best so far: 0.00001

# Function for L2 regularization
def l2_regularization(model, loss):
    l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
    loss += l2_lambda * l2_norm  # Apply L2 regularization
    return loss

In [30]:
# Train the model
epochs = 100000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    y_pred = model(x_train)
    loss = criterion(y_pred, y_train)
    loss = l2_regularization(model, loss)  # Apply L2 regularization
    loss.backward()
    optimizer.step()

    if epoch % 500 == 0 or epoch == epochs-1:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

Epoch 0, Loss: 0.002871772041544318
Epoch 500, Loss: 0.004121589940041304
Epoch 1000, Loss: 0.002859259955585003
Epoch 1500, Loss: 0.003803364234045148
Epoch 2000, Loss: 0.002893748227506876
Epoch 2500, Loss: 0.002955489093437791
Epoch 3000, Loss: 0.002858104184269905
Epoch 3500, Loss: 0.0030439188703894615
Epoch 4000, Loss: 0.002980070188641548
Epoch 4500, Loss: 0.0028636844363063574
Epoch 5000, Loss: 0.0028091075364500284
Epoch 5500, Loss: 0.0032219192944467068
Epoch 6000, Loss: 0.002946931403130293
Epoch 6500, Loss: 0.0029084894340485334
Epoch 7000, Loss: 0.002954555908218026
Epoch 7500, Loss: 0.002774035558104515
Epoch 8000, Loss: 0.003045202698558569
Epoch 8500, Loss: 0.0027621241752058268
Epoch 9000, Loss: 0.0027557408902794123
Epoch 9500, Loss: 0.0028272983618080616
Epoch 10000, Loss: 0.003109192242845893
Epoch 10500, Loss: 0.0027708448469638824
Epoch 11000, Loss: 0.002856904175132513
Epoch 11500, Loss: 0.0037994810845702887
Epoch 12000, Loss: 0.0029933974146842957
Epoch 12500, 

In [33]:
# Calculate L2 relative error
model.eval()
with torch.no_grad():
    y_pred_test = model(x_test)
    l2_norm = torch.sqrt(torch.sum((y_pred_test - y_test) ** 2))
    f_norm = torch.sqrt(torch.sum(y_test ** 2))
    l2_relative_error = l2_norm / f_norm
    print(f'L2 Relative Error: {l2_relative_error.item()}')


L2 Relative Error: 0.049379269480705264
