In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import os
from os import listdir
import PIL
from PIL import Image as im

In [2]:
chihuahua_dir = r"images\sample\chihuahua"
chimgs = []
for image_ in os.listdir(chihuahua_dir):
    image_path = os.path.join(chihuahua_dir, image_)
    chim = im.open(image_path, 'r')
    small = chim.resize((32, 32))
    chimgs.append(small)

chihuahua_data = np.array(chimgs)
print(chihuahua_data.shape)

(640, 32, 32, 3)


In [3]:
muffin_dir = r"images\sample\muffin"
mufimgs_pre = []
for image_ in os.listdir(muffin_dir):
    image_path = os.path.join(muffin_dir, image_)
    mufim = im.open(image_path, 'r')
    small = mufim.resize((32, 32))
    mufimgs_pre.append(small)

# one image had no color channels
mufimgs = []
for img in mufimgs_pre:
    if str(np.shape(img)) == '(32, 32, 3)':
        mufimgs.append(img)

muffin_data = np.array(mufimgs)
print(muffin_data.shape)

(543, 32, 32, 3)


In [4]:
c_labels = np.zeros(chihuahua_data.shape[0]) # chihuahua -> 0
m_labels = np.ones(muffin_data.shape[0]) # muffin -> 1

In [5]:
image_pixels = np.concatenate((chihuahua_data, muffin_data), axis = 0)
labels = np.concatenate((c_labels, m_labels), axis = 0)

In [6]:
# currently the first 639 imgaes are of chihuahuas and the remianing are of muffins.
# this will affect the train-val-test split. so we shuffle.

indices = np.arange(image_pixels.shape[0])
np.random.shuffle(indices)

shuffled_image_pixels = image_pixels[indices]
shuffled_labels = labels[indices]

X_train = shuffled_image_pixels[:1000]
X_val = shuffled_image_pixels[1000:1091]
X_test = shuffled_image_pixels[1091:]

y_train = shuffled_labels[:1000]
y_val = shuffled_labels[1000:1091]
y_test = shuffled_labels[1091:]

print('X_train\t', X_train.shape)
print('y_train\t', y_train.shape)
print('X_val\t', X_val.shape)
print('y_val\t', y_val.shape)
print('X_test\t', X_test.shape)
print('y_test\t', y_test.shape)

X_train	 (1000, 32, 32, 3)
y_train	 (1000,)
X_val	 (91, 32, 32, 3)
y_val	 (91,)
X_test	 (92, 32, 32, 3)
y_test	 (92,)


Forward

In [7]:
# so we have X_train, which will be our image matrix of shape (1000, 32, 32, 3).
# the filter_weights matrix will pass through the image with a specific stride. 
# a padding must be set to preserve the dimentions.
# 1 conv layer will have 36 filters of size 7 x 7 each.

filter_weights = np.random.randn(36, 7, 7, 3)
filter_biases = np.random.randn(36,)

stride = 1
pad = 6
image_h, image_w = X_train.shape[1], X_train.shape[2]
filter_h, filter_w = filter_weights.shape[1], filter_weights.shape[2]
num_images = X_train.shape[0]

X_train_padded = np.pad(X_train, ((0, 0), (pad, pad), (pad, pad), (0, 0)), mode = 'constant') # input matrix

# image_h_out = 1 + (image_h + 2 * pad - filter_h) // stride # formula for calculating height of output 
# image_w_out = 1 + (image_w + 2 * pad - filter_w) // stride # formula for calculating width of output
# print(image_h_out, image_w_out)
image_h_out = (((image_h + pad) - filter_h) // stride) + 1 # new
image_w_out = (((image_w + pad) - filter_w) // stride) + 1 # new
# print(image_h_out_new, image_w_out_new)

channel_out = filter_weights.shape[0] # output channels

out = np.zeros((num_images, image_h_out, image_w_out, channel_out))

counter = 0

for i in range(num_images): # for one image
    for j in range(image_h_out): # 32
        for k in range(image_w_out): # 32
            for l in range(channel_out): # 36
                h_start = j * stride # height starting index for filter traversal
                h_end = h_start + filter_h # height ending index for filter traversal
                w_start = k * stride # width starting index for filter traversal
                w_end = w_start + filter_w # width ending index for filter traversal

                out[i, j, k, l] = np.sum(X_train_padded[i, h_start:h_end, w_start:w_end, :] * filter_weights[l, :, :, :] * filter_biases[l]) 

                counter += 1

    if (i + 1) % 200 == 0:
        print('Images Processed:', i + 1)

Images Processed: 200
Images Processed: 400
Images Processed: 600
Images Processed: 800
Images Processed: 1000


In [9]:
pool_h, pool_w = 2, 2 # height and width of pool region
stride = 2

num_images = out.shape[0] # 1000
image_h = out.shape[1] # 32
image_w = out.shape[2] # 32
num_channels = out.shape[3] # 36

# pool_h_out = 1 + (image_h - pool_h) // stride # formula for calculating height of pooled output
# pool_w_out = 1 + (image_w - pool_w) // stride # formula for calculating width of pooled output
# print(pool_h_out)

pool_h_out = (((image_h) - pool_h) // stride) + 1 # new
pool_w_out = (((image_w) - pool_w) // stride) + 1 # new
# print(pool_h_out_new)

pooled_out = np.zeros((num_images, pool_h_out, pool_w_out, num_channels))

for i in range(num_images):
    for j in range(pool_h_out):
        for k in range(pool_w_out):
            for l in range(num_channels):
                pool_h_start = j * stride
                pool_h_end = pool_h_start + pool_h
                pool_w_start = k * stride
                pool_w_end = pool_w_start + pool_w

                pool_region = out[i, pool_h_start:pool_h_end, pool_w_start:pool_w_end, l] 
                pooled_out[i, j, k, l] = np.max(pool_region)

print('Max Pooled Output Shape for one Image:', pooled_out.shape[1:])

Max Pooled Output Shape for one Image: (16, 16, 36)


In [9]:
flat = pooled_out.reshape(1000, -1)

In [None]:
# flat_norm = 2 * ((flat - flat.min()) / (flat.max() - flat.min())) - 1

In [None]:
flat_sigmoid = 1 / (1 + np.exp(-flat))

In [23]:
# the fully connected section has 1 hidden layer with 50 neurons

input_features = flat_sigmoid.shape[1] # 9216
neurons_layer1 = 50
neurons_layer2 = 1

w1 = np.random.randn(input_features, neurons_layer1)
w2 = np.random.randn(neurons_layer1, neurons_layer2)
b1 = np.random.randn(neurons_layer1)
b2 = np.random.randn(neurons_layer2)

hidden_out = np.dot(flat_sigmoid, w1) + b1 # (1000, 9216) -dot- (9216, 50) = (1000, 50)
affine_out = np.dot(hidden_out, w2) + b2 # (1000, 50) -dot- (50, 1) = (1000, 1)

In [None]:
# norm_out = 2 * ((affine_out - affine_out.min()) / (affine_out.max() - affine_out.min())) - 1

In [None]:
y_pred = 1.0 / (1.0 + np.exp(-affine_out))

In [26]:
loss = np.sum(np.power((y_train - y_pred), 2) / 2) / len(y_train)
print(loss)

127.95469830897378


Backward

In [27]:
d_loss_by_d_y_pred = y_pred - y_train

In [None]:
d_y_pred_by_d_affine_out = y_pred * (1 - y_pred)

In [33]:
d_affine_out_by_d_w2 = hidden_out 

In [None]:
d_l_w2 = d_loss_by_d_y_pred * d_y_pred_by_d_affine_out * d_affine_out_by_d_w2

In [34]:
d_affine_out_by_d_b2 = 1

In [None]:
d_l_b2 = d_loss_by_d_y_pred * d_y_pred_by_d_affine_out

In [35]:
d_affine_out_by_d_hidden_out = w2

In [None]:
d_hidden_out_by_d_w1 = flat_sigmoid

In [None]:
d_l_w1 = d_loss_by_d_y_pred * d_y_pred_by_d_affine_out * d_affine_out_by_d_w2 * d_affine_out_by_d_hidden_out * d_hidden_out_by_d_w1

In [37]:
d_hidden_out_by_d_b1 = 1

In [None]:
d_l_b1 = d_loss_by_d_y_pred * d_y_pred_by_d_affine_out * d_affine_out_by_d_hidden_out

In [None]:
d_l_f_setup = d_loss_by_d_y_pred * d_y_pred_by_d_affine_out * d_affine_out_by_d_hidden_out