In [None]:
def sigmoid(y_in):
    y_hat = 1 / (1 + np.exp(-y_in))
    return y_hat

In [None]:
def perceptron(x, w, b):
    y_in = x * w + b
    y_hat = sigmoid(y_in)
    return y_hat

In [None]:
def grad_w(x, y, w, b):
    y_hat = perceptron(x, w, b)
    db =  (y - y_hat) * y_hat * (1 - y_hat)
    return db

def grad_b(x, y, w, b):
    y_hat = perceptron(x, w, b)
    dw =  (y - y_hat) * y_hat * (1 - y_hat) * x
    return dw

In [None]:
def minibatch(w, b, x, y,a):
    n = 0.1
    epoch = 10
    batch_size = int(input("Enter the batch size: "))
    for i in range(epoch):
        dw, db, sample_no = 0, 0, 0
        for xi, yi in zip(x, y):
            dw += grad_w(w, b, xi, yi)
            db += grad_b(w, b, xi, yi)
            sample_no += 1
            if sample_no % batch_size == 0:
                w = w - dw*a
                b = b - db *a

    return w, b
x=np.array([0.5,2.5])
y=np.array([1.2,0.9])
w=0.0
b=0
a=0.1
new_w, new_b = minibatch(w, 0, x, y,a)

print("Updated w:", new_w)
print("Updated b:", new_b)

Enter the batch size: 2
Updated w: 0.2663132373855611
Updated b: 0.03190748395173052


In [None]:
def momentum_descent(w, b, x, y, alpha,beta, num_epochs):
    v_w, v_b = 0.0, 0.0
    for epoch in range(num_epochs):
        dw,db=0,0
        for xi, yi in zip(x, y):
            dw = grad_w(w, b, xi, yi)
            db = grad_b(w, b, xi, yi)

        v_w = beta * v_w +(1-beta)* dw
        v_b = beta * v_b + (1-beta)* db

        w -= v_w*alpha
        b -= v_b*alpha
    return w, b

x=np.array([0.5,2.5])
y=np.array([1.2,0.9])
w=0.0
b=0
a=0.1
num_epochs = 10
beta=0.9
new_w, new_b = momentum_descent(w, 0.0, x, y, a, beta, num_epochs)

print("Updated w:", new_w)
print("Updated b:", new_b)

Updated w: 0.06015928469643391
Updated b: 0.00076167207008995


In [None]:
import numpy as np

def adagrad(w, b, x, y, alpha, epsilon, num_epochs):

    for epoch in range(num_epochs):
        for xi, yi in zip(x, y):
            dw = grad_w(w, b, xi, yi)
            db = grad_b(w, b, xi, yi)

            w -= (alpha / (np.sqrt(dw ** 2) + epsilon)) * dw
            b -= (alpha / (np.sqrt(db ** 2) + epsilon)) * db

    return w, b

x=np.array([0.5,2.5])
y=np.array([1.2,0.9])
w=0.0
b=0
a=0.1
num_epochs = 10
eps=0.00001

new_w, new_b = adagrad(w, 0, x, y, a, eps, num_epochs)

print("Updated w:", new_w)
print("Updated b:", new_b)

Updated w: 0.9995639624032058
Updated b: 0.8992503170804655


In [None]:
def NAG(w, b, x, y, alpha, beta, num_epochs):
    v_w, v_b = 0.0, 0.0
    for epoch in range(num_epochs):
        for xi, yi in zip(x, y):
            lookahead_dw = grad_w(w - beta * v_w, b - beta * v_b, xi, yi)
            lookahead_db = grad_b(w - beta * v_w, b - beta * v_b, xi, yi)

            v_w = beta * v_w - alpha * lookahead_dw
            v_b = beta * v_b - alpha * lookahead_db

            w += v_w
            b += v_b

    return w, b

x=np.array([0.5,2.5])
y=np.array([1.2,0.9])
w=0.0
b=0
a=0.1
num_epochs = 10
beta=0.9
new_w, new_b = NAG(w, 0, x, y, a, beta, num_epochs)

print("Updated w:", new_w)
print("Updated b:", new_b)

Updated w: 1.3658099442347835
Updated b: 0.35595057946420394


In [None]:
import numpy as np
def adam(w, b, x, y, alpha, beta1, beta2, epsilon, num_epochs):
    w,b=0.0,0.0
    m_w = np.zeros_like(w)
    m_b, v_b = 0.0, 0.0
    v_w = np.zeros_like(w)
    i=0
    for epoch in range(num_epochs):
        for xi, yi in zip(x, y):
            i += 1
            dw = grad_w(w, b, xi, yi)
            db = grad_b(w, b, xi, yi)
            m_w = beta1 * m_w + (1 - beta1) * dw
            m_b = beta1 * m_b + (1 - beta1) * db
            v_w = beta2 * v_w + (1 - beta2) * (dw ** 2)
            v_b = beta2 * v_b + (1 - beta2) * (db ** 2)
            m_w_hat = m_w / (1 - beta1 ** i)
            m_b_hat = m_b / (1 - beta1 ** i)
            v_w_hat = v_w / (1 - beta2 ** i)
            v_b_hat = v_b / (1 - beta2 ** i)
            w -= (alpha / (np.sqrt(v_w_hat) + epsilon)) * m_w_hat
            b -= (alpha / (np.sqrt(v_b_hat) + epsilon)) * m_b_hat
    return w, b
x=np.array([0.5,2.5])
y=np.array([1.2,0.9])
w,b,a=0.0,0,0.1
num_epochs = 10
esp=0.0001
beta1,beta2 = 0.9,0.999
new_w, new_b = adam(w, 0, x, y, a, beta1, beta2, eps, num_epochs)
print("Updated w:", new_w)
print("Updated b:", new_b)

Updated w: 1.2533562939177947
Updated b: 1.0582274871855926


In [11]:
import numpy as np

def adadelta(w, b, x, y, rho, epsilon, num_epochs):
    w, b = 0.0, 0.0
    E_dw, E_db = 0.0,0.0
    delta_w, delta_b =  0.0,0.0

    for epoch in range(num_epochs):
        for xi, yi in zip(x, y):
            dw = grad_w(w, b, xi, yi)
            db = grad_b(w, b, xi, yi)

            E_dw = rho * E_dw + (1 - rho) * (dw ** 2)
            E_db = rho * E_db + (1 - rho) * (db ** 2)

            delta_w = -np.sqrt(delta_w + epsilon) / (np.sqrt(E_dw + epsilon)) * dw
            delta_b = -np.sqrt(delta_b + epsilon) / (np.sqrt(E_db + epsilon)) * db


            w += delta_w
            b += delta_b

    return w, b

x = np.array([0.5, 2.5])
y = np.array([1.2, 0.9])
w, b, alpha, rho, eps, num_epochs = 0.0, 0.0, 0.1, 0.95, 0.001, 10

new_w, new_b = adadelta(w, b, x, y, rho, eps, num_epochs)
print("Updated w:", new_w)
print("Updated b:", new_b)

Updated w: 2.7740631226689167
Updated b: 0.8735200645845033
