# Adam Gradient Descent

In [1]:
import numpy as np

# Function to perform Adam (Adaptive Moment Estimation) Gradient Descent
# Adam combines momentum and RMSprop to achieve faster and more stable convergence

def adam_gradient_descent(X, y, learning_rate=0.01, epochs=1000, beta1=0.9, beta2=0.999, epsilon=1e-8):
    m, n = X.shape  # m: number of samples, n: number of features
    theta = np.zeros(n)  # Initialize weights (all zeros)
    moment = np.zeros(n)  # First moment vector (momentum term)
    velocity = np.zeros(n)  # Second moment vector (RMSprop term)
    history = []  # To track the cost function over iterations
    
    for epoch in range(1, epochs + 1):
        gradients = (1/m) * X.T.dot(X.dot(theta) - y)  # Compute gradients
        moment = beta1 * moment + (1 - beta1) * gradients  # Update biased first moment estimate
        velocity = beta2 * velocity + (1 - beta2) * (gradients ** 2)  # Update biased second moment estimate
        
        # Bias correction
        moment_corrected = moment / (1 - beta1 ** epoch)
        velocity_corrected = velocity / (1 - beta2 ** epoch)
        
        # Parameter update
        theta -= learning_rate * moment_corrected / (np.sqrt(velocity_corrected) + epsilon)
        
        cost = (1/(2*m)) * np.sum((X.dot(theta) - y) ** 2)  # Compute cost
        history.append(cost)  # Store the cost for visualization later
        
        if epoch % 100 == 0:  # Print progress every 100 epochs
            print(f"Epoch {epoch}, Cost: {cost:.6f}")  # Display the current cost
    
    return theta, history  # Return optimized parameters and cost history

# Example Usage: Linear Regression with Adam Gradient Descent
X = np.array([[1, 1], [1, 2], [1, 3], [1, 4]])  # Adding bias term (column of ones)
y = np.array([2, 3, 4, 5])  # Target variable

theta, history = adam_gradient_descent(X, y)  # Run Adam Descent
print("Optimized Weights:", theta)  # Print the final values of the weights


Epoch 100, Cost: 0.340038
Epoch 200, Cost: 0.001637
Epoch 300, Cost: 0.000000
Epoch 400, Cost: 0.000000
Epoch 500, Cost: 0.000000
Epoch 600, Cost: 0.000000
Epoch 700, Cost: 0.000000
Epoch 800, Cost: 0.000000
Epoch 900, Cost: 0.000000
Epoch 1000, Cost: 0.000000
Optimized Weights: [1. 1.]
