In [0]:
from os import walk
import time
import numpy as np
import PIL.Image as Im #Image Processing library
import tqdm #Loading Bar

# Section 1: Opening and normalizing images

This function gets a folder name and returns the names of all the files in that folder

In [0]:
def imagePaths(folderName):
    f = []
    for root, dirs, files in walk(folderName):
        for filename in files:
            f.append(folderName+'/'+filename)
    return f

This function opens the images and returns an array of grayscale images

In [0]:
def openImages(paths):
    images = []
    for path in paths[:numberOfImagesForTraining]:
        image = Im.open(path).convert("L")
        images.append(image)
    return images

This function resizes the input images

In [0]:
def ResizeAndKeepRatio(images,ratio,avgWidth,avgHeight):
    newImages = []
    sizes = []
    # Make all images of the average width while keeping the aspect ratio
    for image in images:
        if (image.height/image.width)<ratio:
            wpercent = (avgWidth / float(image.size[0]))
            hsize = int((float(image.size[1]) * float(wpercent)))
            image = image.resize((avgWidth, hsize), Im.ANTIALIAS)
        else:
            hpercent = (avgHeight / float(image.size[1]))
            wsize = int((float(image.size[0]) * float(hpercent)))
            image = image.resize((wsize, avgHeight), Im.ANTIALIAS)
        newImages.append(image)
        sizes.append(image.size)
    images = newImages
    return images

Get the max height of the images and fill images under that size with black bars to match the heights


In [0]:
def fillImagesWithBlackBars(images,avgWidth,avgHeight):
    newImages = []
    for image in images:
        oldSize = image.size
        newSize = (avgWidth, avgHeight)
        newImage = Im.new("L", newSize,color=128)
        newImage.paste(image, (int((newSize[0] - oldSize[0]) / 2),
                                int((newSize[1] - oldSize[1]) / 2)))
        newImages.append(newImage)
    images = newImages
    return images

This function takes images of different sizes and returns images of the same size without distortion

In [0]:
def adjustImageSizes(images,size = (0,0)):
    # Get average Width of images if we don't have it yet (training set)
    if (size==(0,0)):
        sizes = []
        for image in images:
            sizes.append(image.size)
        averages = np.ceil(np.mean(sizes, axis=0))

        avgWidth = int(averages[0])
        avgHeight = int(averages[1])
    # Otherwise, this would be the test set in which case we apply the same dimensions for the
    # training set
    else:
        avgWidth = size[0]
        avgHeight = size[1]
    ratio = avgHeight / avgWidth

    images = ResizeAndKeepRatio(images,ratio,avgWidth,avgHeight)

    images = fillImagesWithBlackBars(images,avgWidth,avgHeight)
    return images

Get labels of the images from the paths

In [0]:
def getLabels(paths):
    labels = []
    for path in paths[:numberOfImagesForTraining]:
        if "cat" in path:
            labels.append(1)
        else:
            labels.append(0)
    return labels

Converts an image to an array of numbers; those numbers 
represent the pixels

In [0]:
def imagesToArray(images):
    array = []
    for image in images:
        element = np.asarray(image)
        element = element.flatten()
        element = normalize(element)
        array.append(element)
    return array

Process train images by making them the same size and extracting their labels, then turning them to

In [0]:
def processTrainImages(path):
    paths = imagePaths(path)
    labels = getLabels(paths)
    images = openImages(paths)
    images = adjustImageSizes(images)
    X = imagesToArray(images)
    return X,labels,images[0].size

We need a seperate function to process the test images because their width and height is not determined by the average width and height of test images but by the average width and height of the ttraining images. 
This is because the number of inputs into the model needs to be the same whether we are training or testing.

In [0]:
def processTestImages(path,size):
    paths = imagePaths(path)
    images = openImages(paths)
    images = adjustImageSizes(images,size)
    X = imagesToArray(images)
    return X,images

# Section 2: The learning

Activation functions and their derivatives

In [0]:
def RelU(Z):
    return np.maximum(Z,0)

In [0]:
def RelUPrime(Z):
    return np.where(Z > 0, 1, 0)

In [0]:
def sigmoid(S):
    sig = 1/(1+np.exp(-S))
    return sig

In [0]:
def sigmoidPrime(S):
    sig = sigmoid(S)*(1-sigmoid(S))
    return sig

Normalization

In [0]:
# Old normalization function, does not give good results
def oldNormalize(X):
    sum = np.sum(np.square(X))
    sqr = sum**(0.5)
    X = X/sqr
    return X

In [0]:
def normalize(X):
    max = np.max(X)
    min = np.min(X)
    norm = (X-min)/(max-min)
    return norm

Initializition of weights and biases

In [0]:
def initialize(nx):
    W = []
    B = []
    # Factor by which the values are shrunk
    factor = 1
    firstW = np.random.randn(nodes,nx)*factor
    W.append(firstW)
    firstB = np.random.randn(nodes, 1) *factor
    B.append(firstB)
    for i in range(L-2):
        middleW = np.random.randn(nodes,nodes)*factor
        W.append(middleW)
        middleB = np.random.randn(nodes,1)*factor
        B.append(middleB)
    lastW = np.random.randn(1,nodes)*factor
    W.append(lastW)
    lastB = np.random.randn() *factor
    B.append(lastB)

    return W,B

Gradient Descent

In [0]:
def gradientDescent(W,B,X,Y,iters,learnRate):
    X = np.transpose(X)
    nx,m = np.shape(X)
    Z = [None]*L
    A = [None]*L
    dZ = [None]*L
    dW = [None]*L
    dB = [None]*L
    dA = [None]*L

    for j in tqdm.tqdm(range(iters)):
        # Forward Pass
        A,Z = forwardPass(X,W,B,Z,A)
        # Initialization for backward propagation
        dZ[L-1] = A[L-1] - Y
        dA[L-2] = np.dot(np.transpose(W[L-1]),dZ[L-1])
        dW[L-1] = (1 / m) * np.dot(dZ[L-1], A[L-2].T)
        dB[L-1] = (1 / m) * np.sum(dZ[L-1])
        # Back Propagation
        for layer in range(L-2, -1, -1):
            dZ[layer] = np.multiply(dA[layer],sigmoidPrime(Z[layer]))
            dW[layer] = (1/m) * np.dot(dZ[layer],A[layer-1].T)
            dB[layer] = (1/m) * np.sum(dZ[layer],axis=1,keepdims=True)
            dA[layer-1] = np.dot(np.transpose(W[layer]),dZ[layer])
        for layer in range(0,L):
            W[layer] = W[layer] - learnRate * dW[layer]
            B[layer] = B[layer] - learnRate * dB[layer]
    return W, B

In [0]:
def forwardPass(X,W,B,Z,A):
    layer = 0
    Z[layer] = np.dot(W[layer], X) + B[layer]
    A[layer] = sigmoid(Z[layer])
    for layer in range(1, L):
        Z[layer] = np.dot(W[layer], A[layer - 1]) + B[layer]
        A[layer] = sigmoid(Z[layer])
    # Overwriting A[L-1] instead of checking in every iteration
    A[L - 1] = sigmoid(Z[L - 1])
    return A,Z

test() returns the results instead of the success rate. This is because this is a learning project and it is helpful for me to see the result outputed for each element of the test set ad therefore see where learning is good and where it is bad.

In [0]:
def test(X,W,B):
    X = np.transpose(X)
    result = []
    A,Z = forwardPass(X,W,B,[None]*L,[None]*L)
    for element in A[L-1][0]:
        result.append(round(element))
    return result

## Main Function

In [0]:
def main():
    trainPath = "put the path to  the training set here"
    testPath  = "put the path to the test set here"
    X,Y,size = processTrainImages(trainPath)
    W,B = initialize(np.shape(X)[1])

    print("Learning with a learning rate of",learningRate,"and",iterations,"iterations")
    W,B = gradientDescent(W,B,X,Y,iterations,learningRate)
    X,images = processTestImages(testPath, size)
    result = test(X,W,B)
    print("Result",result)


## Hyperparameters

In [0]:
# This is the number of images used for training
# make it larger to train better
# make it smaller to test quickly
# make sure the number does not exceed the number of training images you have
numberOfImagesForTraining = 100
# Number of Layers
L = 10
# Number of nodes per hidden layer
nodes = 15
# Number of Iterations
iterations = 500
# Learning rate
learningRate = 0.1

main()