In [None]:
import sys
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
import pandas as pd
from sklearn.model_selection import KFold
from tqdm import tqdm, tqdm_notebook

IMG_SIZE = 30

train_label_filename = './train_labels.csv'
train_images_filename = "./scaled_train_images.npy"
test_images_filename = "./scaled_test_images.npy"
formatted_data_filename = './formatted_train_data.npy'

# Create a 'function' to convert labels to numbers
class_name_list = [
    "sink","pear","moustache",
    "nose","skateboard","penguin",
    "peanut","skull","panda",
    "paintbrush","nail","apple",
    "rifle","mug","sailboat",
    "pineapple","spoon","rabbit",
    "shovel","rollerskates","screwdriver",
    "scorpion","rhinoceros","pool",
    "octagon","pillow","parrot",
    "squiggle","mouth","empty",
    "pencil"]

label_to_num = dict(zip(class_name_list, range(0, len(class_name_list))))

In [None]:
##
## Note: The following 3 cells are just for image pre-processing. 
## You can skip reading them if you've seen the pre-processing in another notebook
##

# Helper methods when working with arrays
def imagify(origin_array, size):
    #function to turn a 1d vector into a square matrix
    #origin_aray -> any vector
    #size -> the size of the matrix to create
    new_array = np.zeros((size,size))
    for i in range (0,size):
        for j in range (0,size):
            new_array[i][j] = origin_array[i*size+j]
    return new_array

def de_imagify(img, size):
    #function to turn a square matrix into a vector
    #img -> square matrix
    #size -> the size of the square matrix
    new_array = np.zeros((size ** 2))
    for i in range(size):
        for j in range(size):
            new_array[i*size+j] = img[i][j]
    return np.asarray(new_array)

In [None]:
def crop_all_images(input_file_path, output_file_path):
    #function to take all the images from the input_file_path, and crop them to a uniform size
    #by default, all images are cropped and rescaled to 100,100
    #the resulting images are saved into the output_file_path
    #return: the size of the biggest cropped image, before rescaling
    all_img = np.load(input_file_path, encoding='latin1')

    #make an identical copy of the file, we will only modify the data of the images
    cropped_img = all_img.copy()
    #make a list to store the cropped images temporarily
    cropped_list = []

    #variables storing the size of the biggest image, used to resize all the samples
    max_width = 0
    max_height = 0
    for i in tqdm(range(all_img.shape[0])):
        #get the image in this row
        img = imagify(all_img[i][1],100)
        #make a copy that will remain unaltered
        img_cpy = img.copy()
        #blur the image
        img = cv.GaussianBlur(img,(3,3),0)
        # convert to grayscale
        imgray = np.uint8(img * 255) 
        #convert to binary image
        ret, thresh = cv.threshold(imgray, 20, 255, 0)
        #get the contours in the image
        im2, contours, hierarchy = cv.findContours(thresh, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
        #convert to rgb to have 3 channels
        im2 = cv.cvtColor(im2, cv.COLOR_GRAY2RGB)   
        #now get the biggest contour from the image
        maxArea = 0
        maxIndex = 0
        if len(contours) != 0:
            for i in range(len(contours)):
                if cv.contourArea(contours[i]) > maxArea:
                    maxArea = cv.contourArea(contours[i])
                    maxIndex = i
        #get the coordinates of the rectangle surrounding the shape
        a,b,c,d = cv.boundingRect(contours[maxIndex])
        #draw the rectangle
        #cv.rectangle(img,(a,b),(a+c,b+d),(255),1)
        #crop the original image
        crop = img_cpy[b:b+d, a:a+c]
        temp_max = np.max([c,d])
        crop = cv.resize(crop,(100,100))
        cropped_img[i][1] = de_imagify(crop,100)
        cropped_list.append(crop)
        if (c > max_width):
            max_width = c
        if (d > max_height):
            max_height = d

    #get the max size, i.e. biggest value between width and height
    max_size = np.max([max_height,max_width])
    for i in tqdm(range(cropped_img.shape[0])):
    #for i in tqdm(range(2)):
        #resize the array
        np.resize(cropped_img[i][1],(max_size ** 2))
        #cropped_img[i][1].resize(max_size)
        img = cropped_list[i]
        #crop the image to the max size, as a square
        crop = cv.resize(img,(max_size,max_size))
        #cropped_img[i][1] = de_imagify(crop,max_size)
        cropped_img[i][1] = de_imagify(crop,max_size)
        #crop = cv.resize(crop,(100,100))


    np.save(output_file_path, cropped_img)
    return max_size

In [None]:
def resize_all(input_file_path,output_file_path, current_size, size):
    #function to resize all the images in a file
    #takes the images in input_file_path and puts the resized ones in output_file_path
    #current_size -> the current size of the square matrices representing the images
    #size -> the wanted size of the square matrices
    all_img = np.load(input_file_path, encoding='latin1')
    print(all_img.shape[0])
    all_copy = all_img.copy()
    img_list = []
    for i in tqdm(range(all_img.shape[0])):
        img = imagify(all_img[i][1],current_size)
        #resize the array
        np.resize(all_img[i][1],(size ** 2))
        resized_img = cv.resize(img,(size,size))
        all_img[i][1] = de_imagify(resized_img,size)
    np.save(output_file_path,all_img)

In [None]:
# Sigmoid function for our activation function, with additional
# option for getting the derivative (when calculating gradient)
def sigmoid(x, deriv=False):
    if deriv:
        return (sigmoid(x) * (1 - sigmoid(x)))
    else: 
        return (1.0 / (1.0 + np.exp(-x)))

In [None]:
def square_error(prediction, correct_answer, deriv=False):
    if deriv:
        return np.subtract(prediction, correct_answer.reshape(31, 1))
    else:
        difference = np.subtract(prediction, correct_answer)
        return np.sum(np.square(difference))

In [None]:
# Function to read in our given data file and convert its structure into something easier to work with
def create_train_data():
    training_labels = []
    with open(train_label_filename) as label_file:
        # Skip the header line
        next(label_file)
        for line in label_file:
            # take the text label, convert it to a number
            text_label = line.split(",")[1].strip()
            label = label_to_num[text_label]
            training_labels.append(label)
        
    # Turns an integer value into a vector with only that index set to 1
    # This is used to turn our label into something we can use in our cost
    # function computation. (final_layer - convert_y_to_vector(label))
    def val_to_vec(val, size=len(class_name_list)):
        vec = np.zeros((size,))
        vec[val] = 1
        return vec
        
#     size = crop_all_images('./train_images.npy','./scaled_train_images.npy')
#     resize_all('./scaled_train_images.npy','./scaled_train_images.npy',size,30)
#     size = crop_all_images('./test_images.npy','./scaled_test_images.npy')
#     resize_all('./scaled_test_images.npy','./scaled_test_images.npy',size,30)

    training_images = np.load(train_images_filename, encoding='latin1')
    # Turn rows of (id, image) to (normalized image, vectorized label of image)
    for i in tqdm_notebook(range(training_images.shape[0])):
        training_images[i] = np.array([
            (training_images[i][1] / np.sum(training_images[i][1])).reshape((IMG_SIZE**2, 1)), # Normalize the images
            val_to_vec(training_labels[i]) # Image label as vector
        ])
    np.save(formatted_data_filename, training_images)
    return training_images

In [None]:
if __name__ == '__main__':
    # Check if we've already reformatted our input data, or if we need to recalculate it
    if not os.path.isfile(formatted_data_filename):
        train_data = create_train_data()
    else:
        print("Using pre-formatted data file")
        train_data = np.load(formatted_data_filename, encoding='latin1')

# Separates the training data back out into Xs and ys
Xs, ys = np.hsplit(train_data, 2)
Xs, ys = np.stack(Xs.flatten()), ys.flatten()

In [None]:
class NeuralNetwork:
    def __init__(self, num_layers, input_size, output_size):
        self.num_layers = num_layers
        
        # First we calculate the size of each layer, evenly spaced over the range from input to output
        layer_sizes = np.linspace(output_size, input_size, num=num_layers, dtype=int)[::-1]
        
#     def __init__(self, layer_sizes):
#         self.num_layers = len(layer_sizes)
    
        # Each layer is an MxN matrix, where N is the size of the input vector and M is the size of the output vector
        self.weights = [np.random.rand(layer_sizes[i + 1], layer_sizes[i]) for i in range(0, self.num_layers - 1)]
        self.biases = [np.random.rand(layer_sizes[i + 1], 1) for i in range(0, self.num_layers - 1)]
        
        # Define our default cost and activation functions
        self.activation_func = sigmoid
        self.cost_func = square_error
        
    def tune_parameters(self, Xs, ys, learn_rate = 1.0):
        # Initialize an empty gradient
        bias_gradient = [np.zeros(bias.shape) for bias in self.biases]
        weight_gradient = [np.zeros(weight.shape) for weight in self.weights]
        for X, y in zip(Xs, ys):
            # Back-propagate a data point...
            bias_change, weight_change = self.backpropagate(X, y)
            # Then update the overall gradient with the individual point's gradient
            bias_gradient = [bg + bc for bg, bc in zip(bias_gradient, bias_change)]
            weight_gradient = [wg + wc for wg, wc in zip(weight_gradient, weight_change)]
        # Update the weights and biases by the average gradient scaled by the learning rate
        self.weights = [old_weight - (learn_rate / len(Xs)) * wg for old_weight, wg in zip(self.weights, weight_gradient)]
        self.biases = [old_bias - (learn_rate / len(ys)) * bg for old_bias, bg in zip(self.biases, bias_gradient)]
        
    # Get the gradient for the cost function for a single data point
    def backpropagate(self, X, y):
        bias_gradient = [np.zeros(bias.shape) for bias in self.biases]
        weight_gradient = [np.zeros(weight.shape) for weight in self.weights]
        # We start with out activation just being our input
        current_activation = X
        activation_history = [X]
        z_history = []
        # First, pass our input all the way through the network
        for bias, weight in zip(self.biases, self.weights):
            # We normalize our calculation (by dividing over the sum) so that all our sigmoid values don't become 1
            z = np.dot(weight, current_activation) + bias
            z = z / np.sum(z)
            z_history.append(z)
            current_activation = self.activation_func(z)
            activation_history.append(current_activation)
        # Calculate the gradient for the last layer
        sig =  self.activation_func(z_history[-1], deriv=True)
        cost = self.cost_func(activation_history[-1], y, deriv=True)
        delta = cost * sig
        bias_gradient[-1] = delta
        weight_gradient[-1] = np.dot(delta, activation_history[-2].transpose())
        # Then, moving back through the other layers, propagate the weight shifts
        for layer in range(2, num_layers):
            z = z_history[-layer]
            delta = np.dot(self.weights[-layer + 1].transpose(), delta) * sigmoid(z, deriv=True)
            bias_gradient[-layer] = delta
            weight_gradient[-layer] = np.dot(delta, activation_history[-layer - 1].transpose())
        return bias_gradient, weight_gradient
    
    # Returns the vector result of our network's prediction
    def feed_forward(self, X):
        activation = X
        for weight, bias in zip(self.weights, self.biases):
            z = np.dot(weight, activation) + bias
            z = z / np.sum(z)
            activation = self.activation_func(z)
        return activation
    
    def accuracy(self, Xs, ys):
        correct = 0
        for X, y in zip(Xs, ys):
            if self.predict(X) == np.argmax(y):
                correct += 1
        return correct / len(Xs)
    
    # Returns the label result of our network's prediction
    def predict(self, X):
        vec = self.feed_forward(X)
        return np.argmax(vec.reshape(31,))

In [None]:
# Train our neural network on each of the cross-validation folds
kfold = KFold(5, False)
num_layers = 3

best_network = None
best_mse = sys.float_info.max

epochs = 40

for training_indices, validation_indices in tqdm_notebook(kfold.split(Xs), desc="K-Folds", total=5):
    
    # Get the current folds of training/validation data
    train_x, train_y = Xs[training_indices], ys[training_indices]
    valid_x, valid_y = Xs[validation_indices], ys[validation_indices]
    
    # Initialize a bunch of random layers (containing weights and biases)
    network = NeuralNetwork(num_layers, train_x.shape[1], len(class_name_list))
    
    # Perform gradient descent on the layers
    for i in tqdm_notebook(range(epochs), desc="Epochs"):
        # Shuffle all our data (zipping to preserve order)
        train_data = list(zip(train_x, train_y))
        random.shuffle(train_data)
        # Split into chunks of size 100
        train_chunks = [train_data[x:x+100] for x in range(0, len(train_data), 100)]
        for train_chunk in tqdm_notebook(train_chunks, desc="Training Batches"):
            # Unpack our chunk and tune our network on that chunk
            batch_Xs, batch_ys = zip(*train_chunk)
            network.tune_parameters(batch_Xs, batch_ys)
            
    network_error = sum([square_error(network.feed_forward(X), y) for X, y in zip(valid_x, valid_y)]) / len(valid_x)
    if network_error < best_mse:
        best_network = network
        best_mse = network_error

In [None]:
test_data = np.load(test_images_filename, encoding='latin1')
_, test_Xs = np.hsplit(test_data, 2)
test_Xs = np.stack(test_Xs.flatten())

predictions = [best_network.predict(X) for X in test_Xs]

with open ('submission.csv','w',) as outfile:
    outfile.write('Id,Category\n')
    for (i, label) in enumerate(predictions):
        outfile.write(f"{i},{class_name_list[label]}\n")