<a href="https://colab.research.google.com/github/itsmepriyabrata/priyabrata_ai_python/blob/main/Optimization_Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Gradient Descent

In [1]:
import numpy as np

def mean_squared_error(y_true, y_pred):
  """Calculates the mean squared error between true and predicted values."""
  return np.sum((y_true - y_pred) ** 2) / len(y_true)

def gradient_descent(X, y, learning_rate, num_iters):
  """
  Implements gradient descent to find weights and bias for linear regression.

  Args:
      X: A numpy array of shape (num_samples, num_features) containing training data.
      y: A numpy array of shape (num_samples,) containing target values.
      learning_rate: The learning rate for gradient descent updates.
      num_iters: The number of iterations to run gradient descent.

  Returns:
      A tuple containing the learned weights and bias.
  """

  weights = np.random.rand(X.shape[1])
  bias = 0

  for _ in range(num_iters):
    y_predicted = np.dot(X, weights) + bias

    gradient_weights = -(2/X.shape[0]) * np.dot(X.T, (y_predicted - y))
    gradient_bias = -(2/X.shape[0]) * np.sum(y_predicted - y)

    weights -= learning_rate * gradient_weights
    bias -= learning_rate * gradient_bias

  return weights, bias

X = np.array([[1, 1], [2, 2], [3, 3], [4, 4]])
y = np.array([2, 4, 5, 6])

learning_rate = 0.01
num_iters = 1000

weights, bias = gradient_descent(X, y, learning_rate, num_iters)

print("Learned weights:", weights)
print("Learned bias:", bias)

y_predicted = np.dot(X, weights) + bias
print("Predicted values:", y_predicted)


Learned weights: [-1.45239356e+119 -1.45239356e+119]
Learned bias: -4.892786087589406e+118
Predicted values: [-3.39406574e+119 -6.29885287e+119 -9.20363999e+119 -1.21084271e+120]


Stochastic Gradient

In [2]:
import numpy as np

def mean_squared_error(y_true, y_pred):
  """
  Calculates the mean squared error between true and predicted values.

  Args:
    y_true: The ground truth labels (numpy array).
    y_pred: The predicted labels (numpy array).

  Returns:
    The mean squared error (float).
  """
  return np.mean((y_true - y_pred) ** 2)

def gradient(X, y, w, b):
  """
  Calculates the gradient of the mean squared error for linear regression.

  Args:
    X: The training features (numpy array).
    y: The training labels (numpy array).
    w: The weights (numpy array).
    b: The bias (float).

  Returns:
    A tuple containing the gradients of the weights and bias (numpy arrays).
  """
  n = len(y)
  predictions = X.dot(w) + b
  errors = predictions - y
  weight_gradient = (1/n) * X.T.dot(errors)
  bias_gradient = (1/n) * np.sum(errors)
  return weight_gradient, bias_gradient

def stochastic_gradient_descent(X, y, learning_rate, n_epochs, batch_size=1):
  """
  Performs stochastic gradient descent for linear regression.

  Args:
    X: The training features (numpy array).
    y: The training labels (numpy array).
    learning_rate: The learning rate (float).
    n_epochs: The number of epochs (int).
    batch_size: The size of mini-batches (int, default 1 for full batch SGD).

  Returns:
    A tuple containing the trained weights and bias (numpy arrays).
  """
  n, d = X.shape
  w = np.zeros(d)
  b = 0

  for epoch in range(n_epochs):
    # Shuffle data for each epoch (optional for better convergence)
    X_shuffled, y_shuffled = X.copy(), y.copy()
    np.random.seed(epoch)
    np.random.shuffle(X_shuffled)
    np.random.shuffle(y_shuffled)

    for i in range(0, n, batch_size):
      X_batch = X_shuffled[i:i+batch_size]
      y_batch = y_shuffled[i:i+batch_size]

      weight_gradient, bias_gradient = gradient(X_batch, y_batch, w, b)
      w -= learning_rate * weight_gradient
      b -= learning_rate * bias_gradient

  return w, b

X = np.array([[1], [2], [3], [4]])
y = np.array([2, 4, 5, 6])
learning_rate = 0.01
n_epochs = 1000

w, b = stochastic_gradient_descent(X, y, learning_rate, n_epochs)

y_pred = X.dot(w) + b
print(f"Predicted labels: {y_pred}")

mse = mean_squared_error(y, y_pred)
print(f"Mean squared error: {mse}")


Predicted labels: [4.14820039 4.28506891 4.42193744 4.55880596]
Mean squared error: 1.7768064472147336


Adam

In [3]:
import numpy as np

class Adam:
  """
  Stochastic Adam optimizer for machine learning.
  """
  def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
    """
    Initializes the Adam optimizer with hyperparameters.

    Args:
      learning_rate (float): The base learning rate. Defaults to 0.001.
      beta1 (float): The exponential decay rate for the first moment estimate. Defaults to 0.9.
      beta2 (float): The exponential decay rate for the second moment estimate. Defaults to 0.999.
      epsilon (float): A small constant for numerical stability. Defaults to 1e-8.
    """
    self.learning_rate = learning_rate
    self.beta1 = beta1
    self.beta2 = beta2
    self.epsilon = epsilon
    self.m = None
    self.v = None
    self.t = 0

  def update(self, params, grads):
    """
    Updates the parameters using the Adam optimization algorithm.

    Args:
      params (list): A list of NumPy arrays representing the model parameters.
      grads (list): A list of NumPy arrays representing the gradients of the loss function with respect to the parameters.

    Returns:
      list: A list of updated NumPy arrays representing the model parameters.
    """
    self.t += 1
    if self.m is None:
      self.m = [np.zeros_like(p) for p in params]
      self.v = [np.zeros_like(p) for p in params]

    for i, (param, grad) in enumerate(zip(params, grads)):
      self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
      self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * grad**2
      m_hat = self.m[i] / (1 - self.beta1**self.t)
      v_hat = self.v[i] / (1 - self.beta2**self.t)
      param -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

    return params

learning_rate = 0.01
beta1 = 0.9
beta2 = 0.999

params = [np.random.rand(10), np.random.rand(5)]
grads = [np.random.rand(10), np.random.rand(5)]

optimizer = Adam(learning_rate, beta1, beta2)
updated_params = optimizer.update(params, grads)

print("Updated parameters:", updated_params)


Updated parameters: [array([0.62968144, 0.08092526, 0.32222568, 0.41738095, 0.54438581,
       0.61812652, 0.68739294, 0.77994969, 0.12189035, 0.33277045]), array([0.19155961, 0.69732423, 0.02339926, 0.89925004, 0.39516067])]


RMSprop

In [6]:
def rmsprop(params, grads, learning_rate, rho, eps):
  """
  Stochastic RMSprop optimization algorithm.

  Args:
      params: List of numpy arrays containing the model parameters.
      grads: List of numpy arrays containing the gradients of the loss function.
      learning_rate: Learning rate for the optimization.
      rho: Decay rate for the moving average of squared gradients.
      eps: Epsilon for numerical stability.

  Returns:
      Updated list of model parameters.
  """
  updated_params = []
  for param, grad in zip(params, grads):
    if not hasattr(param, 's'):
      param.s = np.zeros_like(param)
    param.s = rho * param.s + (1 - rho) * np.square(grad)
    updated_param = param - learning_rate * grad / (np.sqrt(param.s) + eps)
    updated_params.append(updated_param)
  return updated_params
