In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the neural network
class AddNumbersNet(nn.Module):
    def __init__(self):
        super(AddNumbersNet, self).__init__()
        self.fc = nn.Linear(2, 1)  # Two inputs, one output

    def forward(self, x):
        return self.fc(x)

# Proper weight initialization
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)  # Xavier initialization for stability
        nn.init.zeros_(m.bias)  # Initialize biases to zero

# Generate normalized dataset
def generate_data(num_samples=5000):  # Increased dataset size
    x = torch.rand((num_samples, 2)) * 100  # Random numbers between 0 and 100
    x = (x - x.mean()) / x.std()  # Standardize inputs to zero mean and unit variance
    y = x.sum(dim=1, keepdim=True)  # Sum the two numbers
    y = y / y.std()  # Normalize target to match input scale
    return x, y

# Initialize model, loss, and optimizer
model = AddNumbersNet()
model.apply(init_weights)  # Apply weight initialization
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Adam optimizer for better convergence

# Training loop
x_train, y_train = generate_data(5000)
epochs = 5000  # Train for more epochs
for epoch in range(epochs):
    # Forward pass
    predictions = model(x_train)
    loss = criterion(predictions, y_train)

    # Check for NaN loss
    if torch.isnan(loss).any():
        print(f"Loss is NaN at epoch {epoch}!")
        break

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()

    # Gradient clipping to prevent exploding gradients
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()

    # Log every 500 epochs
    if epoch % 500 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

# Test the model
x_test = torch.tensor([[10.0, 20.0], [15.0, 25.0]])
x_test = (x_test - x_train.mean()) / x_train.std()  # Normalize test data
y_test = model(x_test)
y_test_rounded = y_test.round()  # Round predictions to nearest whole number
print("Input:", x_test)
print("Predicted Sum (Rounded):", y_test_rounded.detach())


Epoch 0, Loss: 3.829310178756714
Epoch 500, Loss: 1.9475975036621094
Epoch 1000, Loss: 0.78922438621521
Epoch 1500, Loss: 0.15770266950130463
Epoch 2000, Loss: 0.016104672104120255
Epoch 2500, Loss: 0.0008411712478846312
Epoch 3000, Loss: 1.7257056242669933e-05
Epoch 3500, Loss: 1.2401521587435127e-07
Epoch 4000, Loss: 3.302814466366044e-09
Epoch 4500, Loss: 2.6808528730271064e-09
Input: tensor([[10., 20.],
        [15., 25.]])
Predicted Sum (Rounded): tensor([[21.],
        [28.]])


In [2]:
data = generate_data(10)
print(data)

(tensor([[-0.0852, -1.4949],
        [ 1.0021,  0.8382],
        [-0.2320,  0.4840],
        [-0.6381, -0.8283],
        [-0.8017,  0.1593],
        [-0.8495,  0.7828],
        [-1.6055, -1.2282],
        [ 1.5005, -0.8097],
        [ 0.1867,  1.5583],
        [ 1.2385,  0.8226]]), tensor([[-0.9613],
        [ 1.1196],
        [ 0.1533],
        [-0.8922],
        [-0.3908],
        [-0.0406],
        [-1.7240],
        [ 0.4203],
        [ 1.0617],
        [ 1.2540]]))


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the neural network with additional layers
class AddNumbersNet(nn.Module):
    def __init__(self):
        super(AddNumbersNet, self).__init__()
        self.fc1 = nn.Linear(2, 64)  # Input to hidden layer
        self.fc2 = nn.Linear(64, 32)  # Hidden layer
        self.fc3 = nn.Linear(32, 1)  # Hidden to output layer
        self.relu = nn.ReLU()  # Activation function

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

# Proper weight initialization
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)  # Xavier initialization for stability
        nn.init.zeros_(m.bias)  # Initialize biases to zero

# Generate normalized dataset
def generate_data(num_samples=5000):  # Increased dataset size
    x = torch.rand((num_samples, 2)) * 100  # Random numbers between 0 and 100
    x = (x - x.mean()) / x.std()  # Standardize inputs to zero mean and unit variance
    y = x.sum(dim=1, keepdim=True)  # Sum the two numbers
    y = y / y.std()  # Normalize target to match input scale
    return x, y

# Test data generation and evaluation function
def evaluate_model(model, test_samples=10):
    x_test = torch.rand((test_samples, 2)) * 100  # Generate test data
    y_actual = x_test.sum(dim=1, keepdim=True)  # Calculate actual sums
    x_test_normalized = (x_test - x_train.mean()) / x_train.std()  # Normalize test data

    # Get predictions and round for whole numbers
    y_pred = model(x_test_normalized).detach()
    y_pred_scaled = y_pred * y_train.std()  # Rescale predictions to original range
    y_pred_rounded = y_pred_scaled.round()

    # Print results
    print(f"{'Input':<20}{'Predicted':<20}{'Actual':<20}{'Error':<10}")
    print("-" * 70)
    for i in range(test_samples):
        input_vals = ", ".join(f"{v:.2f}" for v in x_test[i].tolist())  # Format list as a string
        print(f"{input_vals:<20}{y_pred_rounded[i].item():<20.2f}{y_actual[i].item():<20.2f}{abs(y_pred_rounded[i].item() - y_actual[i].item()):<10.2f}")


# Initialize model, loss, and optimizer
model = AddNumbersNet()
model.apply(init_weights)  # Apply weight initialization
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Adam optimizer for better convergence

# Training loop
x_train, y_train = generate_data(5000)
epochs = 5000  # Train for more epochs
for epoch in range(epochs):
    # Forward pass
    predictions = model(x_train)
    loss = criterion(predictions, y_train)

    # Check for NaN loss
    if torch.isnan(loss).any():
        print(f"Loss is NaN at epoch {epoch}!")
        break

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()

    # Gradient clipping to prevent exploding gradients
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()

    # Log every 500 epochs
    if epoch % 500 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")

# Evaluate the model
evaluate_model(model, test_samples=10)


Epoch 0, Loss: 0.9498544931411743
Epoch 500, Loss: 2.473349559295457e-05
Epoch 1000, Loss: 1.3263051187095698e-05
Epoch 1500, Loss: 9.237308404408395e-06
Epoch 2000, Loss: 5.803426120110089e-06
Epoch 2500, Loss: 3.690952780743828e-06
Epoch 3000, Loss: 2.6776713184517575e-06
Epoch 3500, Loss: 1.8784181747832918e-06
Epoch 4000, Loss: 1.6382275589421624e-06
Epoch 4500, Loss: 1.351693867945869e-06
Input               Predicted           Actual              Error     
----------------------------------------------------------------------


TypeError: unsupported format string passed to list.__format__