# Advanced Neural Network Training

This notebook contains the training implementation with momentum and learning rate decay.
Run the implementation notebook first to get the network class.

In [None]:
# Generate complex dataset
def generate_complex_dataset(n_samples=1000):
    """
    Generate a complex 4D dataset that requires a deeper network to learn
    """
    np.random.seed(42)
    
    # Generate 4D input features
    X = np.random.uniform(-2, 2, (n_samples, 4))
    
    # Complex non-linear function
    y = (
        0.3 * np.sin(X[:, 0] * X[:, 1]) +
        0.4 * np.cos(X[:, 2]) * X[:, 3] +
        0.2 * (X[:, 0] ** 2 + X[:, 1] ** 2) +
        0.1 * np.exp(-0.5 * (X[:, 2] ** 2 + X[:, 3] ** 2))
    )
    
    # Normalize output to [0, 1] range
    y = (y - y.min()) / (y.max() - y.min())
    
    return X, y.reshape(-1, 1)

# Generate datasets
print("üìä Generating complex dataset...")
X_train, y_train = generate_complex_dataset(800)
X_test, y_test = generate_complex_dataset(200)

print(f"Training set: {X_train.shape[0]} samples, {X_train.shape[1]} features")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Output range: [{y_train.min():.3f}, {y_train.max():.3f}]")

## üéì Advanced Backpropagation with Momentum

In [None]:
def advanced_backpropagation_with_momentum(network, forward_result, targets, 
                                         learning_rate, weight_momentum, 
                                         bias_momentum, momentum_factor):
    """
    Advanced backpropagation with momentum for faster training
    """
    if targets.ndim == 1:
        targets = targets.reshape(-1, 1)
    
    activations = forward_result['activations']
    z_values = forward_result['z_values']
    
    # Calculate output error
    output_error = activations[-1] - targets
    
    # Initialize lists to store gradients
    weight_gradients = []
    bias_gradients = []
    
    # Backpropagate through each layer
    current_error = output_error
    
    for i in reversed(range(network.num_layers - 1)):
        # Get activation derivative
        _, activation_derivative = network.get_activation_function(network.activations[i])
        
        # Calculate delta
        delta = current_error * activation_derivative(z_values[i])
        
        # Calculate gradients
        weight_grad = np.dot(activations[i].T, delta) / activations[i].shape[0]
        bias_grad = np.mean(delta, axis=0, keepdims=True)
        
        weight_gradients.append(weight_grad)
        bias_gradients.append(bias_grad)
        
        # Calculate error for previous layer
        if i > 0:
            current_error = np.dot(delta, network.weights[i].T)
    
    # Reverse gradients
    weight_gradients.reverse()
    bias_gradients.reverse()
    
    # Update weights and biases with momentum
    for i in range(network.num_layers - 1):
        # Update momentum terms
        weight_momentum[i] = momentum_factor * weight_momentum[i] + learning_rate * weight_gradients[i]
        bias_momentum[i] = momentum_factor * bias_momentum[i] + learning_rate * bias_gradients[i]
        
        # Update parameters
        network.weights[i] -= weight_momentum[i]
        network.biases[i] -= bias_momentum[i]
    
    return np.mean(output_error ** 2)

print("üéì Advanced backpropagation implemented!")

## üöÄ Training Function

In [None]:
def train_advanced_network(network, X_train, y_train, X_test, y_test, 
                          epochs=1000, initial_lr=0.01, momentum=0.9, 
                          lr_decay=0.95, decay_every=200):
    """
    Advanced training with momentum and learning rate decay
    """
    print(f"üöÄ Starting training for {epochs} epochs")
    print(f"üìä Learning rate: {initial_lr}, Momentum: {momentum}")
    
    # Initialize momentum terms
    weight_momentum = [np.zeros_like(w) for w in network.weights]
    bias_momentum = [np.zeros_like(b) for b in network.biases]
    
    current_lr = initial_lr
    train_losses = []
    test_losses = []
    
    start_time = time.time()
    
    for epoch in range(epochs):
        # Forward pass on training data
        train_result = network.forward_pass(X_train)
        
        # Backpropagation with momentum
        train_loss = advanced_backpropagation_with_momentum(
            network, train_result, y_train, current_lr, 
            weight_momentum, bias_momentum, momentum
        )
        
        # Evaluate on test set
        test_result = network.forward_pass(X_test)
        test_loss = network.calculate_loss(test_result['final_output'], y_test)
        
        # Store history
        train_losses.append(train_loss)
        test_losses.append(test_loss)
        
        # Learning rate decay
        if (epoch + 1) % decay_every == 0:
            current_lr *= lr_decay
        
        # Print progress
        if epoch % 100 == 0 or epoch == epochs - 1:
            elapsed = time.time() - start_time
            print(f"Epoch {epoch:4d}: Train={train_loss:.6f}, Test={test_loss:.6f}, Time={elapsed:.1f}s")
    
    print(f"\n‚úÖ Training completed!")
    return {'train_losses': train_losses, 'test_losses': test_losses}

print("üöÄ Training function ready!")

## üéØ Train the Network

Now let's train our advanced network!

In [None]:
# Train the network (make sure you've run the implementation notebook first)
# network = AdvancedNeuralNetwork([4, 6, 4, 1], ['relu', 'relu', 'sigmoid'])

history = train_advanced_network(
    network=network,
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    y_test=y_test,
    epochs=1000,
    initial_lr=0.01,
    momentum=0.9
)

## üìà Visualize Results

In [None]:
# Plot training results
plt.figure(figsize=(15, 5))

# Loss curves
plt.subplot(1, 3, 1)
plt.plot(history['train_losses'], 'b-', label='Training Loss', linewidth=2)
plt.plot(history['test_losses'], 'r-', label='Test Loss', linewidth=2)
plt.title('Training Progress')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.yscale('log')

# Predictions vs Actual (Training)
plt.subplot(1, 3, 2)
train_pred = network.forward_pass(X_train)['final_output']
plt.scatter(y_train, train_pred, alpha=0.5, s=10)
plt.plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], 'r--', linewidth=2)
plt.title('Training: Predicted vs Actual')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.grid(True, alpha=0.3)

# Predictions vs Actual (Test)
plt.subplot(1, 3, 3)
test_pred = network.forward_pass(X_test)['final_output']
plt.scatter(y_test, test_pred, alpha=0.5, s=10, color='red')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
plt.title('Test: Predicted vs Actual')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Performance metrics
train_mse = np.mean((y_train - train_pred) ** 2)
test_mse = np.mean((y_test - test_pred) ** 2)
train_r2 = 1 - train_mse / np.var(y_train)
test_r2 = 1 - test_mse / np.var(y_test)

print("\nüìä PERFORMANCE METRICS")
print(f"Training R¬≤: {train_r2:.4f}")
print(f"Test R¬≤: {test_r2:.4f}")
print(f"Final train loss: {history['train_losses'][-1]:.6f}")
print(f"Final test loss: {history['test_losses'][-1]:.6f}")

if test_r2 > 0.8:
    print("\nüéâ Excellent performance!")
elif test_r2 > 0.6:
    print("\nüëç Good performance!")
else:
    print("\n‚ö†Ô∏è Could use more training.")