In [None]:
import numpy as np
import pandas as pd
import os
import random
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline  

In [None]:
dataset_path = "C:\\models\\numpy\\logistic_regression\\data"

In [None]:
image_names = os.listdir(dataset_path)
print(image_names)

In [None]:
dataset = {}

for name in image_names:
    image_path = os.path.join(dataset_path, name)
    splitted = name.split(".")
    if splitted[0].endswith("cat"):
        dataset[image_path] = 0
    elif splitted[0].endswith("dog"):
        dataset[image_path] = 1

In [None]:
sample_image_path = "C:\\models\\numpy\\logistic_regression\\data\\cat.0.jpg"
img = np.array(Image.open(sample_image_path))
plt.imshow(img)

In [None]:
n_h = img.shape[0]
n_w = img.shape[1]
n_c = img.shape[2]
x = img.reshape(n_h * n_w * n_c, 1)
print("flattened shape :", x.shape)

In [None]:
x = []
y = []

for image_path, label in dataset.items():
    image = Image.open(image_path).resize((100,100))
    image_arr = np.array(image)/255.0
    x.append(image_arr)
    y.append(label)
    
random.seed(43)
random.shuffle(x)
random.shuffle(y)

x = np.array(x)
y = np.array(y)

print("shape of x is : ", x.shape)
print("shape of y is : ", y.shape)

In [None]:
save_path = "C:\\models\\numpy\\logistic_regression\\preprocessed_data"
np.save(f"{save_path}\\x.npy", x)
np.save(f"{save_path}\\y.npy", y)

In [None]:
m = 10
x_flattened = x.reshape(x.shape[1] * x.shape[2] * x.shape[3], x.shape[0])
y_true = y.reshape(1,m)

print("input shape : ", x_flattened.shape)
print("output shape : ", y_true.shape)

In [None]:
def wts_init(ld):
    params = {}
    num_weights = len(ld)-1
    for i in range(1, num_weights+1):
        params["w"+str(i)] = np.random.randn(ld[i], ld[i-1])
        params["b"+str(i)] = np.random.randn(ld[i], 1)
    return params

In [None]:
# debug
ld = [10000, 6, 5, 1]
params = wts_init(ld)
print(params["w1"].shape)
print(params["w2"].shape)
print(params["w3"].shape)

In [None]:
def sigmoid(x):
    sigmoid = 1/(1+np.exp(-x))
    return sigmoid

In [None]:
def fc(x, params):
    a_next = x
    layers = len(params)//2
    forward_cache = []
    for i in range(1, layers):
        a_prev = a_next
        z = np.dot(params["w"+str(i)], a_prev) + params["b"+str(i)]
        a_next = np.tanh(z)
        forward_cache.append((params["w"+str(i)], params["b"+str(i)], z, a_prev, a_next))
    a_prev = a_next
    z_final = np.dot(params["w"+str(layers)], a_prev) + params["b"+str(layers)]        
    y_hat = sigmoid(z_final)
    forward_cache.append((params["w"+str(layers)], params["b"+str(layers)], z_final, a_prev, y_hat))
    return y_hat, forward_cache

In [None]:
def fc_with_dropout(x, params, rate):
    keep_prob = rate
    a_next = x
    layers = len(params)//2
    forward_cache = []
    for i in range(1, layers):
        a_prev = a_next
        z = np.dot(params["w"+str(i)], a_prev) + params["b"+str(i)]
        a_next = np.tanh(z)
        do_mask = np.random.randn(a_next.shape[0], a_next.shape[1]) < (keep_prob)
        a_next = np.multiply(a_next, do_mask)
        a_next = a_next/keep_prob
        forward_cache.append((params["w"+str(i)], params["b"+str(i)], z, a_prev, a_next))
    a_prev = a_next
    z_final = np.dot(params["w"+str(layers)], a_prev) + params["b"+str(layers)]        
    y_hat = sigmoid(z_final)
    forward_cache.append((params["w"+str(layers)], params["b"+str(layers)], z_final, a_prev, y_hat))
    return y_hat, forward_cache

In [None]:
#debug 
x = np.random.rand(10000, 20)
preds, cache = fc_with_dropout(x, params, 0.8)
printn(preds.shape)

In [None]:
def loss(y_true, y_hat):
    m = y_true.shape[1]
    cost = (-1/m) * (np.dot(y_true,np.log(y_hat).T) + np.dot((1-y_true), np.log(1-y_hat).T))
    return np.squeeze(cost)

In [None]:
def frob_norm(parameters, m, lamb):
    norm = 0
    num_layers = len(parameters)//2
    for i in range(1, num_layers+1):
        norm = np.sum(np.square(parameters["w"+str(i)]))
        norm+=norm
    norm = (lamb/(2*m)) * norm
    return norm

In [None]:
def cost_with_reg(parameters, y_true, y_hat):
    lamb = 0.1
    m = y_true.shape[1]
    cost = np.squeeze((-1/m) * (np.dot(y_true,np.log(y_hat).T) + np.dot((1-y_true), np.log(1-y_hat).T)))
    norm = frob_norm(parameters, m, lamb)
    return norm+cost

In [None]:
# debug 
y_hat_tst = np.random.rand(1,20)
y_true_tst = np.random.randn(1,20)
cost = loss(y_true_tst, y_hat_tst)
print(cost)

In [None]:
def bc(ld, y_true, y_pred, cache):
    m = y_true.shape[1]
    gradients = {}
    w_final, b_final, z_final, a_prev, y_hat = cache[-1]
    da_final = -(np.divide(y_true, y_pred) - np.divide((1-y_true), (1-y_pred)))
    dz_final = da_final * sigmoid(z_final) * (1-sigmoid(z_final))
    dw_final = (1/m) * np.dot(dz_final, a_prev.T)
    db_final = np.sum(dz_final, axis=1, keepdims=True)
    da_prev  = np.dot(w_final.T, dz_final)
    gradients["dw3"] = dw_final
    gradients["db3"] = db_final
    
    for i in reversed(range(len(cache)-1)):
        w, b, z, a_prev, h_hat = cache[i]
        da = da_prev
        dz = da * (1-np.square(np.tanh(z)))
        dw = (1/m) * np.dot(dz,a_prev.T) 
        db = np.sum(dz, axis=1, keepdims=True)
        da_prev = np.dot(w.T, dz)
        gradients["dw"+str(i+1)] = dw
        gradients["db"+str(i+1)] = db
    return gradients

In [None]:
# debug
grads = bc(ld, y_true_tst, y_hat_tst, cache)

In [None]:
def init_velocity(params):
    velocity = {}
    num_layers = len(params)//2
    for i in range(1, num_layers+1):
        velocity["vdw"+str(i)] = np.zeros((params["w"+str(i)].shape[0], params["w"+str(i)].shape[1]))
        velocity["vdb"+str(i)] = np.zeros((params["b"+str(i)].shape[0], params["b"+str(i)].shape[1]))
    return velocity

In [None]:
# debug init velocity 
v = init_velocity(params)
print(v["vdw2"].shape)

In [None]:
def update_with_momentum(params, grads, v, beta, lr):
    num_layers = len(params)//2
    for i in range(1, num_layers+1):
        vdw = beta*v["vdw"+str(i)] + (1-beta)*grads["dw"+str(i)]
        vdb = beta*v["vdb"+str(i)] + (1-beta)*grads["db"+str(i)]
        params["w"+str(i)] = params["w"+str(i)] - (lr*vdw)
        params["b"+str(i)] = params["b"+str(i)] - (lr*vdb)
    return params

In [None]:
def init_prop(params):
    s = {}
    num_layers = len(params)//2
    for i in range(1, num_layers+1):
        s["sdw"+str(i)] = np.zeros((params["w"+str(i)].shape[0], params["w"+str(i)].shape[1]))
        s["sdb"+str(i)] = np.zeros((params["b"+str(i)].shape[0], params["b"+str(i)].shape[1]))
    return s

In [None]:
# debug init_prop
s = init_prop(params)

In [None]:
def update_with_rmsprop(params, grads, s, beta, lr):
    num_layers = len(params)//2
    for i in range(1, num_layers+1):
        sdw = beta*s["sdw"+str(i)] + (1-beta)*np.square(grads["dw"+str(i)])
        sdb = beta*s["sdb"+str(i)] + (1-beta)*np.square(grads["db"+str(i)])
        params["w"+str(i)] = params["w"+str(i)] - (lr*(grads["dw"+str(i)]/np.sqrt(sdw)))
        params["b"+str(i)] = params["b"+str(i)] - (lr*(grads["db"+str(i)]/np.sqrt(sdb)))
    return params

In [None]:
def init_adam(params):
    s = {}
    velocity = {}
    num_layers = len(params)//2
    for i in range(1, num_layers+1):
        s["sdw"+str(i)] = np.zeros((params["w"+str(i)].shape[0], params["w"+str(i)].shape[1]))
        s["sdb"+str(i)] = np.zeros((params["b"+str(i)].shape[0], params["b"+str(i)].shape[1]))
        velocity["vdw"+str(i)] = np.zeros((params["w"+str(i)].shape[0], params["w"+str(i)].shape[1]))
        velocity["vdb"+str(i)] = np.zeros((params["b"+str(i)].shape[0], params["b"+str(i)].shape[1]))
    
    return s, velocity

In [None]:
# debug
s, v = init_adam(params)

In [None]:
def update_adam(params, grads, s,v, beta1, beta2, lr, t, epsilon):
    num_layers = len(params)//2
    for i in range(1, num_layers+1):
        vdw = beta1*v["vdw"+str(i)] + (1-beta1)*grads["dw"+str(i)]
        vdb = beta1*v["vdb"+str(i)] + (1-beta1)*grads["db"+str(i)]
        sdw = beta2*s["sdw"+str(i)] + (1-beta2)*np.square(grads["dw"+str(i)])
        sdb = beta2*s["sdb"+str(i)] + (1-beta2)*np.square(grads["db"+str(i)])
        vdw = vdw/(1-beta1**t)
        vdb = vdb/(1-beta1**t)
        sdw = sdw/(1-beta2**t)
        sdb = sdb/(1-beta2**t)
        params["w"+str(i)] = params["w"+str(i)] - (lr*(vdw/(np.sqrt(sdw)+epsilon)))
        params["b"+str(i)] = params["b"+str(i)] - (lr*(vdb/(np.sqrt(sdb)+epsilon)))
    return params

In [None]:
def update(params, grads, lr):
    total_layers = len(params)//2
    for i in range(1, total_layers+1):
        params["w"+str(i)] = params["w"+str(i)] - (lr*grads["dw"+str(i)])
        params["b"+str(i)] = params["b"+str(i)] - (lr*grads["db"+str(i)])
    return params

In [None]:
# debug
updated = update(params, grads, 0.001)

In [None]:
def l_model(epochs, x, y, lr, ld):
    params = wts_init(ld)
    v = init_velocity(params)
    s = init_prop(params)
    beta1 = 0.9
    beta2 = 0.999
    epsilon = 10e-8
    t = 2
    a = x
    epoch_loss = []
    for i in range(1, epochs+1):
        y_hat, cache = fc_with_dropout(x, params, 0.8)
        cost = cost_with_reg(params, y, y_hat)
        grads = bc(ld, y, y_hat, cache)
        params = update_adam(params, grads, s,v, beta1, beta2, lr, t, epsilon)
        print("Epoch ", i, " =====> ", cost)
        epoch_loss.append(cost)
    return params, epoch_loss

In [None]:
wts, history = l_model(10, x_flattened, y_true, 0.0001, ld)