### Написать на PyTorch forward и backward полносвязного слоя без использования autograd

In [1]:
import torch
import torch.nn as nn
import numpy as np

In [2]:
class MyLayer(nn.Module):
    def __init__(self, tensor):
        super().__init__()
        self.params = nn.Parameter(tensor, requires_grad=False)
        
    def forward(self, x):
        return x * self.params
    
    def backward(self, grad):
        return grad    

### Написать 1-2 адаптивных оптимизатора


#### SGD Momentum

In [3]:
# SGD Momentum
# velocity = momentum (0.9-0.99)* velocity - lr*gradient
# w = w + velocity (скорость-вектор)

class SGDMomentum:
    def __init__(self, weights, momentum: float = 0.99, lr: float = 0.001):
        self.momentum = momentum
        self.lr = lr
        self.velocity = torch.zeros_like(weights)
        self.weights = weights
        
    def step(self, grad):
        self.velocity = self.momentum * self.velocity - self.lr * grad
        self.weights = self.weights + self.velocity

#### Adagrad

In [4]:
# accumulated += gradient ** 2
# adapt_lr = lr / sqrt (accumulated) 
# w = w - adapt_lr * gradient

class Adagard:
    def __init__(self, weights, lr: float = 0.001):
        self.weights = weights
        self.lr = lr
        self.accumulated = torch.zeros_like(weights)
        
    def step(self, grad):
        self.accumulated = self.accumulated + grad ** 2
        self.adapt_lr = self.lr / torch.sqrt(self.accumulated)
        self.weights = self.weights - self.adapt_lr * grad

#### RMSprop

In [15]:
# accumulated += rho (0.9 - 0.99) * accumulated + (1- rho) * gradient ** 2
# adapt_lr = lr / sqrt (accumulated) 
# w = w - adapt_lr * gradient

class RMSprop:
    def __init__(self, weights, rho: float = 0.9, lr: float = 0.01):
        self.weights = weights
        self.rho = rho
        self.lr = lr
        self.accumulated = torch.zeros_like(weights)
        
    def step(self, grad):
        self.accumulated = self.accumulated + self.rho * self.accumulated + (1 - self.rho) * grad ** 2
        self.adapt_lr = self.lr / torch.sqrt(self.accumulated)
        self.weights = self.weights - self.adapt_lr * grad

#### Adam

In [6]:
# velocity = beta1 * velocity + (1-beta1) * gradient
# accumulated = beta2 * accumulated + (1-beta2) * gradient ** 2
# adapt_lr = lr / sqrt (accumulated)
# w = w - adapt_lr * velocity

class Adam:
    def __init__(self, weights, lr: float = 0.001, beta1: float = 0.9, beta2: float = 0.999):
        self.weights = weights
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.velocity = torch.zeros_like(weights)
        self.accumulated = torch.zeros_like(weights)
        
    def step(self, grad):
        self.velocity = self.beta1 * self.velocity + (1 - self.beta1) * grad
        self.accumulated = self.beta2 * self.accumulated + (1 - self.beta2) * grad ** 2
        self.adapt_lr = self.lr / torch.sqrt(self.accumulated)
        self.weights = self.weights - self.adapt_lr * self.velocity

### Решить задачу нахождения корней квадратного уравнения методом градиентного спуска

In [7]:
# SGDMomentum(x, 0.9, 0.001), Adagard(x, 0.001), RMSprop(x, 0.9, 0.001), Adam(x, 0.001, 0.9, 0.99)

In [8]:
a, b, c = 1, -7, 6

func_m = lambda x: (a * x ** 2 + b * x + c) ** 2
grad_f = lambda x: 2 * (a * x ** 2 + b * x + c) * (2 * a * x + b) 


In [9]:
def solver(init_x):
    x = torch.tensor(init_x).float()
    g = grad_f(x)
    optim = SGDMomentum(x, 0.9, 0.01)
    prev = optim.weights
    optim.step(g)
    g = grad_f(optim.weights)
    while abs(func_m(prev) - func_m(optim.weights)) > 0.001:
        prev = optim.weights
        optim.step(g)
        g = grad_f(optim.weights)
    print('Root: ', optim.weights.numpy())

solver(-2)
solver(5)

Root:  0.9935858
Root:  5.995339


In [10]:
def solver(init_x):
    x = torch.tensor(init_x).float()
    g = grad_f(x)
    optim = Adagard(x, 0.001)
    prev = optim.weights
    optim.step(g)
    g = grad_f(optim.weights)
    while abs(func_m(prev) - func_m(optim.weights)) > 0.001:
        prev = optim.weights
        optim.step(g)
        g = grad_f(optim.weights)
    print('Root: ', optim.weights.numpy())

solver(-2)
solver(5)

Root:  -1.4589084
Root:  5.0463514


In [11]:
def solver(init_x):
    x = torch.tensor(init_x).float()
    g = grad_f(x)
    optim = Adam(x, 0.001)
    prev = optim.weights
    optim.step(g)
    g = grad_f(optim.weights)
    while abs(func_m(prev) - func_m(optim.weights)) > 0.001:
        prev = optim.weights
        optim.step(g)
        g = grad_f(optim.weights)
    print('Root: ', optim.weights.numpy())

solver(-2)
solver(5)

Root:  0.8734324
Root:  5.9438243


In [12]:
def solver(init_x):
    x = torch.tensor(init_x).float()
    g = grad_f(x)
    optim = Adam(x, 0.001)
    prev = optim.weights
    optim.step(g)
    g = grad_f(optim.weights)
    while abs(func_m(prev) - func_m(optim.weights)) > 0.001:
        prev = optim.weights
        optim.step(g)
        g = grad_f(optim.weights)
    print('Root: ', optim.weights.numpy())

solver(-2)
solver(5)

Root:  0.8734324
Root:  5.9438243


In [22]:
def solver(init_x):
    x = torch.tensor(init_x).float()
    g = grad_f(x)
    optim = RMSprop(x, 0.9, 0.001)
    prev = optim.weights
    optim.step(g)
    g = grad_f(optim.weights)
    while abs(func_m(prev) - func_m(optim.weights)) > 0.001:
        prev = optim.weights
        optim.step(g)
        g = grad_f(optim.weights)
    print('Root: ', optim.weights.numpy())

solver(0)
solver(5)

Root:  0.0092803
Root:  5.009249
