## Claude 3.5 Sonnet Prompt
Write a neural network in PyTorch that can add two numbers. 
Call it AddNet. 
It should add two whole numbers that are between 0 and 100.
Goal 1 is to get it to add two arbitrary whole numbers in a defined range with the lowest possible error.Goal 2 is to keep the neural net and training definitions to be as simple as possible while still accomplishing Goal 1.
At each relevant step of your network definition and training, write about why you made the choice to use that "technique".

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class AddNet(nn.Module):
    def __init__(self):
        super(AddNet, self).__init__()
        # A simple architecture with two hidden layers
        self.network = nn.Sequential(
            nn.Linear(2, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 1)
        )
    
    def forward(self, x):
        return self.network(x)

# Initialize the model
model = AddNet()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Generate training data
def generate_data(num_samples=1000):
    # Generate random numbers between -10 and 10
    x1 = np.random.uniform(-10, 10, (num_samples, 1))
    x2 = np.random.uniform(-10, 10, (num_samples, 1))
    # True sums
    y = x1 + x2
    # Convert to PyTorch tensors
    X = torch.FloatTensor(np.hstack((x1, x2)))
    y = torch.FloatTensor(y)
    return X, y

# Training loop
def train(epochs=1000):
    for epoch in range(epochs):
        X, y = generate_data()
        
        # Forward pass
        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, y)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.6f}')

# Train the model
train()

# Test the model
def test_addition(x1, x2):
    model.eval()
    with torch.no_grad():
        input_tensor = torch.FloatTensor([[x1, x2]])
        prediction = model(input_tensor)
        return prediction.item()

# Example usage
x1, x2 = 5.7, 3.2
predicted_sum = test_addition(x1, x2)
actual_sum = x1 + x2
print(f"\nTest Result:")
print(f"Numbers: {x1} + {x2}")
print(f"Predicted sum: {predicted_sum:.4f}")
print(f"Actual sum: {actual_sum:.4f}")
print(f"Error: {abs(predicted_sum - actual_sum):.4f}")

Epoch [100/1000], Loss: 0.159147
Epoch [200/1000], Loss: 0.062354
Epoch [300/1000], Loss: 0.028616
Epoch [400/1000], Loss: 0.012465
Epoch [500/1000], Loss: 0.008091
Epoch [600/1000], Loss: 0.004660
Epoch [700/1000], Loss: 0.003388
Epoch [800/1000], Loss: 0.002730
Epoch [900/1000], Loss: 0.002180
Epoch [1000/1000], Loss: 0.001726

Test Result:
Numbers: 5.7 + 3.2
Predicted sum: 8.9001
Actual sum: 8.9000
Error: 0.0001


In [2]:
def evaluate_model(model, num_samples=10):
    """
    Evaluates the addition model on randomly generated number pairs.
    
    Args:
        model: The trained AddNet model
        num_samples: Number of evaluation pairs to generate
    """
    model.eval()
    
    # Generate evaluation data
    x1 = np.random.uniform(-10, 10, (num_samples, 1))
    x2 = np.random.uniform(-10, 10, (num_samples, 1))
    
    print("\nModel Evaluation Results")
    print("=" * 60)
    print(f"{'Number 1':<12} {'Number 2':<12} {'Predicted':<12} {'Actual':<12} {'Error':<12}")
    print("-" * 60)
    
    total_error = 0
    
    with torch.no_grad():
        for i in range(num_samples):
            # Get the numbers for this sample
            num1, num2 = x1[i][0], x2[i][0]
            actual = num1 + num2
            
            # Make prediction
            input_tensor = torch.FloatTensor([[num1, num2]])
            predicted = model(input_tensor).item()
            
            # Calculate error
            error = abs(predicted - actual)
            total_error += error
            
            # Print results
            print(f"{num1:<12.4f} {num2:<12.4f} {predicted:<12.4f} {actual:<12.4f} {error:<12.4f}")
    
    # Print summary statistics
    avg_error = total_error / num_samples
    print("-" * 60)
    print(f"Average Error: {avg_error:.6f}")
    print("=" * 60)

# Example usage:
# evaluate_model(model, num_samples=5)

In [3]:
evaluate_model(model, 10)


Model Evaluation Results
Number 1     Number 2     Predicted    Actual       Error       
------------------------------------------------------------
5.1053       2.9027       8.0081       8.0081       0.0000      
3.1782       0.7628       3.9400       3.9410       0.0009      
7.3105       -8.9348      -1.5934      -1.6243      0.0309      
1.3971       -4.1529      -2.7763      -2.7558      0.0205      
5.2125       -5.0050      0.2178       0.2074       0.0104      
9.9177       -6.1530      3.7605       3.7647       0.0042      
-6.9283      -8.6794      -15.5803     -15.6076     0.0273      
-3.4678      4.7029       1.2032       1.2351       0.0318      
-8.3155      -5.8125      -14.1283     -14.1280     0.0004      
-2.3396      -9.8137      -12.1241     -12.1534     0.0293      
------------------------------------------------------------
Average Error: 0.015568


In [6]:
# The second attempt
import torch
import torch.nn as nn
import numpy as np

class AddNet(nn.Module):
    def __init__(self):
        super(AddNet, self).__init__()
        self.layer1 = nn.Linear(2, 16)
        self.activation1 = nn.ReLU()
        self.layer2 = nn.Linear(16, 8)
        self.activation2 = nn.ReLU()
        self.output_layer = nn.Linear(8, 1)
    
    def forward(self, x):
        x = self.activation1(self.layer1(x))
        x = self.activation2(self.layer2(x))
        x = self.output_layer(x)
        return x

# Generate training data
def generate_data(num_samples=1000):
    x1 = np.random.uniform(0, 100, (num_samples, 1))
    x2 = np.random.uniform(0, 100, (num_samples, 1))
    X = np.hstack((x1, x2))
    y = x1 + x2
    return torch.FloatTensor(X), torch.FloatTensor(y)

# Training setup
model = AddNet()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
X_train, y_train = generate_data()
num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    
    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Test the model
model.eval()
test_x1, test_x2 = 25.0, 75.0
test_input = torch.FloatTensor([[test_x1, test_x2]])
predicted_sum = model(test_input)
print(f'\nTest: {test_x1} + {test_x2} = {predicted_sum.item():.4f} (Actual: {test_x1 + test_x2})')

Epoch [100/1000], Loss: 6.0963
Epoch [200/1000], Loss: 1.1273
Epoch [300/1000], Loss: 0.6245
Epoch [400/1000], Loss: 0.4382
Epoch [500/1000], Loss: 0.3222
Epoch [600/1000], Loss: 0.2338
Epoch [700/1000], Loss: 0.1663
Epoch [800/1000], Loss: 0.1156
Epoch [900/1000], Loss: 0.0783
Epoch [1000/1000], Loss: 0.0517

Test: 25.0 + 75.0 = 100.0003 (Actual: 100.0)


In [12]:
num_samples = 10
samples = torch.rand((num_samples, 2)) * 100
for i, row in enumerate(samples):
    x1, x2 = row[0], row[1]
    predicted_sum = model(row)
    print(f'\nTest: {x1} + {x2} = {predicted_sum.item():.4f} (Actual: {x1 + x2})')


Test: 48.77769470214844 + 71.0080337524414 = 119.6938 (Actual: 119.78572845458984)

Test: 49.3056526184082 + 1.2821018695831299 = 50.9178 (Actual: 50.58775329589844)

Test: 73.80733489990234 + 51.15372085571289 = 124.9178 (Actual: 124.9610595703125)

Test: 28.075145721435547 + 90.93944549560547 = 118.9499 (Actual: 119.01458740234375)

Test: 32.875816345214844 + 50.911521911621094 = 83.8631 (Actual: 83.78733825683594)

Test: 43.70690155029297 + 77.75537109375 = 121.3439 (Actual: 121.46227264404297)

Test: 15.821111679077148 + 5.7428717613220215 = 21.9631 (Actual: 21.563983917236328)

Test: 66.68196105957031 + 27.91709327697754 = 94.7174 (Actual: 94.59905242919922)

Test: 72.29139709472656 + 34.02498245239258 = 106.3816 (Actual: 106.31637573242188)

Test: 22.633207321166992 + 30.947362899780273 = 53.8070 (Actual: 53.580570220947266)


Write a neural network in PyTorch that can add two numbers. 
Call it AddNet. 
It should add two whole numbers that are between 0 and 100.
Goal 1 is to get it to add two arbitrary whole numbers in a defined range with the lowest possible error.Goal 2 is to keep the neural net and training definitions to be as simple as possible while still accomplishing Goal 1.
Describe why you choose the Network Architecture, Activation Function, Loss Function, Optimizer, Training Data, Amount of Training Data, and Training Process to accomplish this task specifically.

In [13]:
import torch
import torch.nn as nn
import numpy as np
from torchviz import make_dot
import matplotlib.pyplot as plt

class AddNet(nn.Module):
    """Neural network designed to learn addition of two numbers between 0 and 100."""
    def __init__(self):
        super(AddNet, self).__init__()
        self.hidden = nn.Linear(2, 16)
        self.relu = nn.ReLU()
        self.output = nn.Linear(16, 1)
    
    def forward(self, x):
        x = self.relu(self.hidden(x))
        x = self.output(x)
        return x

def visualize_network(model):
    """Creates and saves a visualization of the network architecture."""
    x = torch.randn(1, 2)
    y = model(x)
    dot = make_dot(y, params=dict(model.named_parameters()))
    dot.render("addnet_architecture", format="png", cleanup=True)
    
    # Print model details
    print("Model Structure:")
    print(model)
    
    print("\nDetailed Layer Information:")
    for name, parameter in model.named_parameters():
        print(f"{name}: {parameter.size()}")
    
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nTotal trainable parameters: {total_params}")

def generate_training_data(num_samples):
    """Generates training data pairs and their sums."""
    x1 = torch.randint(0, 101, (num_samples, 1), dtype=torch.float32)
    x2 = torch.randint(0, 101, (num_samples, 1), dtype=torch.float32)
    inputs = torch.cat((x1, x2), dim=1)
    targets = x1 + x2
    return inputs, targets

def train_add_net(model, num_epochs=1000, batch_size=64):
    """Trains the neural network using generated data."""
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    
    # Generate training data
    inputs, targets = generate_training_data(10000)
    
    # Lists to store loss history
    loss_history = []
    
    for epoch in range(num_epochs):
        # Random batch selection
        idx = torch.randperm(inputs.shape[0])[:batch_size]
        batch_inputs = inputs[idx]
        batch_targets = targets[idx]
        
        # Forward pass
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Store loss
        loss_history.append(loss.item())
        
        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    
    # Plot loss history
    plt.figure(figsize=(10, 5))
    plt.plot(loss_history)
    plt.title('Training Loss Over Time')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid(True)
    plt.savefig('training_loss.png')
    plt.close()

def test_add_net(model, num_samples):
    """Tests the trained model and displays results."""
    model.eval()
    with torch.no_grad():
        # Generate test data
        x1 = torch.randint(0, 101, (num_samples, 1), dtype=torch.float32)
        x2 = torch.randint(0, 101, (num_samples, 1), dtype=torch.float32)
        inputs = torch.cat((x1, x2), dim=1)
        actual_sums = x1 + x2
        
        # Get predictions
        predicted_sums = model(inputs)
        
        # Calculate and display results
        print("\nTesting Results:")
        print("X1\tX2\tPredicted\tActual\t\t%Error")
        print("-" * 50)
        
        total_error = 0
        for i in range(num_samples):
            error_pct = abs(predicted_sums[i].item() - actual_sums[i].item()) / actual_sums[i].item() * 100
            total_error += error_pct
            print(f"{x1[i].item():.0f}\t{x2[i].item():.0f}\t{predicted_sums[i].item():.2f}\t\t{actual_sums[i].item():.2f}\t\t{error_pct:.2f}%")
        
        print(f"\nAverage error: {total_error/num_samples:.2f}%")

def main():
    """Main function to run the entire process."""
    # Create and visualize model
    print("Creating and visualizing network architecture...")
    model = AddNet()
    visualize_network(model)
    
    # Train model
    print("\nStarting training...")
    train_add_net(model)
    
    # Test model
    print("\nTesting model...")
    test_add_net(model, 10)

if __name__ == "__main__":
    main()

Matplotlib is building the font cache; this may take a moment.


Creating and visualizing network architecture...


ExecutableNotFound: failed to execute PosixPath('dot'), make sure the Graphviz executables are on your systems' PATH