In [121]:
from __future__ import annotations
import numpy as np

s0_default: float = 1
p_default: float = 0.5

batch_size_default: int = 1

alpha_default: float = 0.1
eps_default: float = 1e-8

mu_default = 1e-2

tolerance_default: float = 1e-3
max_iter_default: int = 1000

class BaseDescent:
    """
    A base class and examples for all functions
    """

    def __init__(self):
        self.w = None

    def step(self, X: np.ndarray, y: np.ndarray, iteration: int) -> np.ndarray:
        """
        Descent step
        :param iteration: iteration number
        :param X: objects' features
        :param y: objects' targets
        :return: difference between weights
        """
        return self.update_weights(self.calc_gradient(X, y), iteration)

    def update_weights(self, gradient: np.ndarray, iteration: int) -> np.ndarray:
        """
        Example for update_weights function
        :param iteration: iteration number
        :param gradient: gradient
        :return: weight difference: np.ndarray
        """
        self.w -= gradient * mu_default*(s0_default/(s0_default + iteration))**p_default

    def calc_gradient(self, X: np.ndarray, y: np.ndarray) -> np.ndarray:
        """
        Example for calc_gradient function
        :param X: objects' features
        :param y: objects' targets
        :return: gradient: np.ndarray
        """
        dh = 1e-4
        dw = np.eye(self.w.shape[0]) * dh
        M = np.vstack([self.w for i in range(self.w.shape[0])]) + dw
        M = M.transpose()
        M = np.zeros((self.w.shape[0], self.w.shape[0])) + self.w[:, np.newaxis] + dw
        gradient = (np.linalg.norm((X@M).transpose() - y, axis=1) - np.linalg.norm(X@self.w - y))/dh
        return gradient
        


class GradientDescent(BaseDescent):
    """
    Full gradient descent class
    """

    def __init__(self, w0: np.ndarray, lambda_: float, s0: float = s0_default, p: float = p_default):
        """
        :param w0: weight initialization
        :param lambda_: learning rate parameter (float)
        :param s0: learning rate parameter (float)
        :param p: learning rate parameter (float)
        """
        super().__init__()
        self.eta = lambda k: lambda_ * (s0 / (s0 + k)) ** p
        self.w = np.copy(w0)

    def update_weights(self, gradient: np.ndarray, iteration: int) -> np.ndarray:
        """
        Changing weights with respect to gradient
        :param iteration: iteration number
        :param gradient: gradient
        :return: weight difference: np.ndarray
        """
        self.w -= gradient * self.eta(iteration)
    
class MomentumDescent(BaseDescent):
    """
    Momentum gradient descent class
    """

    def __init__(self, w0: np.ndarray, lambda_: float, alpha: float = alpha_default, s0: float = s0_default,
                 p: float = p_default):
        """
        :param w0: weight initialization
        :param lambda_: learning rate parameter (float)
        :param alpha: momentum coefficient
        :param s0: learning rate parameter (float)
        :param p: learning rate parameter (float)
        """
        super().__init__()
        self.eta = lambda k: lambda_ * (s0 / (s0 + k)) ** p
        self.alpha = alpha
        self.w = np.copy(w0)
        self.h = 0

    def update_weights(self, gradient: np.ndarray, iteration: int) -> np.ndarray:
        """
        Changing weights with respect to gradient
        :param iteration: iteration number
        :param gradient: gradient estimate
        :return: weight difference: np.ndarray
        """
        self.h = self.alpha * self.h + self.eta(iteration) * gradient
        self.w -= self.h

class Adagrad(BaseDescent):
    """
    Adaptive gradient algorithm class
    """

    def __init__(self, w0: np.ndarray, lambda_: float, eps: float = eps_default, s0: float = s0_default,
                 p: float = p_default):
        """
        :param w0: weight initialization
        :param lambda_: learning rate parameter (float)
        :param eps: smoothing term (float)
        :param s0: learning rate parameter (float)
        :param p: learning rate parameter (float)
        """
        super().__init__()
        self.eta = lambda k: lambda_ * (s0 / (s0 + k)) ** p
        self.eps = eps
        self.w = np.copy(w0)
        self.g = 0

    def update_weights(self, gradient: np.ndarray, iteration: int) -> np.ndarray:
        """
        Changing weights with respect to gradient
        :param iteration: iteration number
        :param gradient: gradient estimate
        :return: weight difference: np.ndarray
        """
        self.g = self.g + gradient**2
        self.w -= self.eta(iteration) * gradient / (self.eps + self.g)**0.5
        


In [128]:
num_objects = 5
dimension = 2
np.random.seed(42)
X = np.random.rand(num_objects, dimension)
y = 1 * X[:, 0] + 2 * X[:, 1]
w0 = np.zeros(dimension) 
gg = GradientDescent(w0, 0.01)
n_steps = 1000
for i in range(n_steps):
    gg.step(X, y, i)
gg.w

array([0.55564467, 0.97336739])

In [129]:
gg = MomentumDescent(w0, 1, 0.01)
for i in range(n_steps):
    gg.step(X, y, i)
gg.w

array([1.02139936, 2.03684673])

In [135]:
gg = Adagrad(w0, 10)
for i in range(n_steps):
    gg.step(X, y, i)
gg.w

array([0.99751261, 1.99752307])