In [None]:
import numpy as np
import copy
from numpy.random import permutation

In [None]:
# Linear Model with two weights w0 and w1.

In [None]:
class Line():

    def __init__(self):
        self.weights = [np.random.uniform(0,1,1) for _ in range(2)]
        self.derivative_funcs = [self.dx_w0, self.dx_w1]
        
    def evaluate(self,x):
        return self.weights[0] + self.weights[1]*x

    def derivate(self, x, y):
       
        partial_derivatives = []
        
        yhat = self.evaluate(x)
        partial_derivatives.append(self.dx_w0(x, y, yhat))
        partial_derivatives.append(self.dx_w1(x, y, yhat))
        
        return partial_derivatives
    
    def dx_w0(self, x, y, yhat):
      
        return 2*(yhat - y)
    
    def dx_w1(self, x, y, yhat):
        
        return 2*x*(yhat - y)

    def __str__(self):
        return f"y = {self.weights[0]} + {self.weights[1]}*x"

In [None]:
# Helper Functions.

In [None]:
def stochastic_sample(xs, ys):
   
    perm = permutation(len(xs))
    x = xs[perm[0]]
    y = ys[perm[0]]

    return x, y
    
    
def gradient(dx, evaluate, xs, ys):
    
    N = len(ys)
    
    total = 0
    for x,y in zip(xs,ys):
        yhat = evaluate(x)
        total = total + dx(x, y, yhat)
    
    gradient = total/N
    return gradient

In [None]:
# Implementing Adam Algo on model

In [None]:
def adam(model, xs, ys, learning_rate = 0.1, b1 = 0.9, b2 = 0.999, epsilon = 0.00000001, max_iteration = 1000):
  
    # Variable Initialization
    num_param = len(model.weights)
    m = [0 for _ in range(num_param)] # two m for each parameter
    v = [0 for _ in range(num_param)] # two v for each parameter
    g = [0 for _ in range(num_param)] # two gradient
    
    for t in range(1,max_iteration):
        
        # Calculate the gradients 
        x, y = stochastic_sample(xs, ys)
        
        # Get the partial derivatives
        g = model.derivate(x, y)

        # Update the m and v parameter
        m = [b1*m_i + (1 - b1)*g_i for m_i, g_i in zip(m, g)]
        v = [b2*v_i + (1 - b2)*(g_i**2) for v_i, g_i in zip(v, g)]

        # Bias correction for m and v
        m_cor = [m_i / (1 - (b1**t)) for m_i in m]
        v_cor = [v_i / (1 - (b2**t)) for v_i in v]

        # Update the parameter
        model.weights = [weight - (learning_rate / (np.sqrt(v_cor_i) + epsilon))*m_cor_i for weight, v_cor_i, m_cor_i in zip(model.weights, v_cor, m_cor)]
        
        if t % 100 == 0:
            print(f"Iteration {t}")
            print(model)

In [None]:
xs = [1,2,3,4,5,6,7]
ys = [1,2,3,4,5,6,7]

print("Target: intercept = 0 and slope = 1")

model = Line()
print("Adam")
adam(model, xs, ys)
print(model)

Target: intercept = 0 and slope = 1
Adam
Iteration 100
y = [0.0041096] + [1.00208001]*x
Iteration 200
y = [8.11790039e-06] + [1.00261527]*x
Iteration 300
y = [-0.0573506] + [1.03071414]*x
Iteration 400
y = [-0.00676702] + [1.02005114]*x
Iteration 500
y = [0.00051774] + [0.9998113]*x
Iteration 600
y = [6.83475107e-05] + [1.00001067]*x
Iteration 700
y = [-1.73056923e-07] + [0.9999989]*x
Iteration 800
y = [-3.24006123e-07] + [0.99999988]*x
Iteration 900
y = [-4.21820929e-07] + [0.99999985]*x
y = [-2.40667795e-09] + [0.99999999]*x


In [None]:
xs = [1,2,3,4,5,6,7]
ys = [2,4,6,8,10,12,14]

print("Target: intercept = 0 and slope = 2")

model = Line()
print("Adam")
adam(model, xs, ys)
print(model)

Target: intercept = 0 and slope = 2
Adam
Iteration 100
y = [0.90654849] + [1.81216752]*x
Iteration 200
y = [0.15138363] + [1.97278532]*x
Iteration 300
y = [0.01154927] + [1.99821733]*x
Iteration 400
y = [0.00080031] + [1.99984661]*x
Iteration 500
y = [2.33908682e-05] + [1.99999897]*x
Iteration 600
y = [-1.95724703e-07] + [2.00000003]*x
Iteration 700
y = [-1.37109921e-09] + [2.]*x
Iteration 800
y = [-1.83680577e-11] + [2.]*x
Iteration 900
y = [1.17270192e-13] + [2.]*x
y = [-6.17103812e-15] + [2.]*x


In [None]:
xs = [1,2,3,4,5,6,7]
ys = [3,5,7,9,11,13,15]

print("Target: intercept = 1 and slope = 2")

model = Line()
print("Adam")
adam(model, xs, ys)
print(model)

Target: intercept = 1 and slope = 2
Adam
Iteration 100
y = [1.1671971] + [1.96891826]*x
Iteration 200
y = [1.01853834] + [1.99469573]*x
Iteration 300
y = [1.00200444] + [1.99948629]*x
Iteration 400
y = [1.00008894] + [1.99997287]*x
Iteration 500
y = [0.99999993] + [2.]*x
Iteration 600
y = [1.] + [2.]*x
Iteration 700
y = [1.] + [2.]*x
Iteration 800
y = [1.] + [2.]*x
Iteration 900
y = [1.] + [2.]*x
y = [1.] + [2.]*x
