In [None]:
# a simple pytorch neural network for adding two numbers

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# define the neural network
class Adder(nn.Module):
    def __init__(self):
        super(Adder, self).__init__()
        self.fc = nn.Linear(2, 1)
    def forward(self, x):
        return self.fc(x)
    
# create the neural network
net = Adder()

# create the optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# create the loss function
criterion = nn.MSELoss()

# train the neural network
for i in range(10000):
    x = torch.tensor(np.random.rand(2), dtype=torch.float32)
    y = x.sum()
    optimizer.zero_grad()
    y_pred = net(x)
    loss = criterion(y_pred, y)
    loss.backward()
    optimizer.step()
    print('loss:', loss.item())

# test the neural network
x = torch.tensor([0.1, 0.2], dtype=torch.float32)
y = x.sum()
y_pred = net(x)
print('x:', x)
print('y:', y)
print('y_pred:', y_pred)
print('error:', y_pred.item() - y.item())

In [None]:
#viszulize the neural network using torchviz
from torchviz import make_dot
x = torch.tensor([0.1, 0.2], dtype=torch.float32)
y = x.sum()
y_pred = net(x)
make_dot(y_pred, params=dict(net.named_parameters()))

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interactive, IntSlider, FloatSlider, Dropdown, fixed

# ---------------------
# Generate synthetic data
# ---------------------
def generate_data(num_samples=200, noise=0.1):
    # Two-class data
    x = np.random.rand(num_samples, 2) * 2 - 1
    y = (x[:, 0] * x[:, 1] > 0).astype(np.float32)  # XOR-like pattern with noise
    # Add small random noise
    x += np.random.randn(*x.shape) * noise
    return x, y

In [2]:
# ---------------------
# Define a small neural network
# ---------------------
class SimpleMLP(nn.Module):
    def __init__(self, input_dim=2, hidden_dim=16, output_dim=1):
        super(SimpleMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

In [3]:
# ---------------------
# Training function
# ---------------------
def train_model(optimizer_name, lr, epochs, batch_size):
    # Data
    x_data, y_data = generate_data(num_samples=200, noise=0.1)
    x_tensor = torch.from_numpy(x_data).float()
    y_tensor = torch.from_numpy(y_data).float().view(-1, 1)

    model = SimpleMLP()
    criterion = nn.BCELoss()

    # Select optimizer
    if optimizer_name == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=lr)
    elif optimizer_name == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == "RMSProp":
        optimizer = optim.RMSprop(model.parameters(), lr=lr)
    else:
        optimizer = optim.Adam(model.parameters(), lr=lr)  # default fallback

    losses = []
    model.train()
    dataset_size = x_tensor.shape[0]

    for epoch in range(epochs):
        # Mini-batch updates
        perm = torch.randperm(dataset_size)
        epoch_loss = 0.0

        for i in range(0, dataset_size, batch_size):
            idx = perm[i:i+batch_size]
            batch_x = x_tensor[idx]
            batch_y = y_tensor[idx]

            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        avg_loss = epoch_loss / (dataset_size // batch_size)
        losses.append(avg_loss)

    # Plot the loss curve
    plt.figure(figsize=(6,4))
    plt.plot(losses, label=f'{optimizer_name} (lr={lr})')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training Loss")
    plt.legend()
    plt.show()

    # Decision boundary visualization
    model.eval()
    x_min, x_max = x_data[:, 0].min() - 0.2, x_data[:, 0].max() + 0.2
    y_min, y_max = x_data[:, 1].min() - 0.2, x_data[:, 1].max() + 0.2
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                         np.linspace(y_min, y_max, 100))
    grid = torch.from_numpy(np.c_[xx.ravel(), yy.ravel()]).float()
    preds = model(grid).detach().numpy().reshape(xx.shape)

    plt.figure(figsize=(6,4))
    plt.contourf(xx, yy, preds, levels=[0, 0.5, 1], alpha=0.5, cmap="RdBu")
    plt.scatter(x_data[:,0], x_data[:,1], c=y_data, edgecolors='k', cmap="RdBu")
    plt.title("Decision Regions")
    plt.show()

In [None]:
# ---------------------
# Interactive widget
# ---------------------
def interactive_demo(optimizer_name, lr, epochs, batch_size):
    train_model(optimizer_name, lr, epochs, batch_size)

optimizer_options = ["SGD", "Adam", "RMSProp"]
lr_slider = FloatSlider(value=0.01, min=0.0001, max=0.1, step=0.001, description='Learning Rate')
epoch_slider = IntSlider(value=30, min=1, max=200, step=1, description='Epochs')
batch_slider = IntSlider(value=32, min=1, max=128, step=1, description='Batch Size')

demo = interactive(
    interactive_demo,
    optimizer_name=Dropdown(options=optimizer_options, value="SGD", description='Optimizer'),
    lr=lr_slider,
    epochs=epoch_slider,
    batch_size=batch_slider
)

demo