In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import codecs
from PIL import Image

In [2]:
trainingLink = "./train_images.npy"
trainIm = np.load(trainingLink,encoding = 'latin1')

In [3]:
reshapedList = []
for i in range(0, len(trainIm)):
    reshapedList.append(trainIm[i][1].reshape(100,100))
reshapedImages = np.array(reshapedList)

In [4]:
def allPtsInSpot(image, i, j, allpoints, hi):
    Check = allpoints
    currentNew = hi
    Check.append([i, j, image[i, j]])
    currentNew.append([i, j, image[i, j]])
    if((image[i+1, j] > 2 ) and not([i+1, j, image[i+1, j]] in Check) and (i<99)):
        allPtsInSpot(image, i+1, j, Check, currentNew)
    if((image[i-1, j] > 2) and not([i-1, j, image[i-1, j]] in Check) and (i>0)):
        allPtsInSpot(image, i-1, j, Check, currentNew)
    if((image[i, j+1] > 2) and not([i, j+1, image[i, j+1]] in Check) and (j<99)):
         allPtsInSpot(image, i, j+1, Check, currentNew)
    if((image[i, j-1] > 2) and not([i, j-1, image[i, j-1]] in Check) and (j>0)):
        allPtsInSpot(image, i, j-1, Check, currentNew)
    return currentNew

In [5]:
def findDraw(image):
    visited = []
    spotElements = np.zeros(0)                          
    temp = np.zeros(0)
    for i in range(0, 100):
        for j in range(0,100):
            if(image[i,j]!=0 and not([i, j, image[i, j]] in visited)):
                temp = allPtsInSpot(image, i, j, visited, [])
                if(len(temp)>len(spotElements)):
                    spotElements = temp
    return spotElements

In [6]:
def resizing(image):
    imageNp = np.asarray(image)
    xmax, ymax, nope = np.amax(imageNp,axis=0)
    xmin, ymin, nope = np.amin(imageNp,axis=0)
    
    xratio = int(100/(xmax - xmin))
    yratio = int(100/(ymax - ymin))
    
    ratio = min(xratio,yratio)
    resizedImage = np.zeros((100,100))
    
    for i in range(0,np.shape(image)[0]):
        xtemp = int(image[i][0] - xmin)
        ytemp = int(image[i][1] - ymin)
        color = image[i][2]
        for pix in range(0, int(ratio)):
            currentx = (ratio*xtemp + pix)/1.2
            for piy in range(0, int(ratio)):
                currenty = (ratio*ytemp + piy)/1.2
                resizedImage[int(currentx), int(currenty)] = color
    return resizedImage

In [7]:
original_images = []
cropped_images = []

for i in range(0, 10000):
    original_images.append(Image.fromarray(reshapedImages[i]))
    cropped_images.append(Image.fromarray(resizing(findDraw(reshapedImages[i]))))
    print("Done " + str(i/100) +"%" + "\r", end="")

Done 99.99%

In [8]:
image_vectors = []
for i in range(0, 10000):
    image_vectors.append(np.array(cropped_images[i]).reshape(10000,1))
    for j in range(0, len(image_vectors[i])):
        if image_vectors[i][j] > 0:
            image_vectors[i][j] = 1
    print("Done " + str(i/100) + "%" + "\r", end="")

Done 99.99%

In [9]:
label_dictionary = {}
label_dictionary["sink"] = 1
label_dictionary["pear"] = 2
label_dictionary["moustache"] = 3
label_dictionary["nose"] = 4
label_dictionary["skateboard"] = 5
label_dictionary["penguin"] = 6
label_dictionary["peanut"] = 7
label_dictionary["skull"] = 8
label_dictionary["panda"] = 9
label_dictionary["paintbrush"] = 10
label_dictionary["nail"] = 11
label_dictionary["apple"] = 12
label_dictionary["rifle"] = 13
label_dictionary["mug"] = 14
label_dictionary["sailboat"] = 15
label_dictionary["pineapple"] = 16
label_dictionary["spoon"] = 17
label_dictionary["rabbit"] = 18
label_dictionary["shovel"] = 19
label_dictionary["rollerskates"] = 20
label_dictionary["screwdriver"] = 21
label_dictionary["scorpion"] = 22
label_dictionary["rhinoceros"] = 23
label_dictionary["pool"] = 24
label_dictionary["octagon"] = 25
label_dictionary["pillow"] = 26
label_dictionary["parrot"] = 27
label_dictionary["squiggle"] = 28
label_dictionary["mouth"] = 29
label_dictionary["empty"] = 30
label_dictionary["pencil"] = 31

In [10]:
label_link = "./train_labels.csv"
label_file = open(label_link, "r")
label_file_split = label_file.read().split("\n")

target_almost_vectors = []
for i in range(0, 10000):
    target_almost_vectors.append(label_dictionary[label_file_split[i+1].split(",")[1]])

target_vectors = []
for i in range(0, 10000):
    temp_vector = np.zeros((31,1))
    temp_vector[target_almost_vectors[i] - 1] = 1
    target_vectors.append(temp_vector)

label_file.close()

In [11]:
from scipy.stats import truncnorm
def truncated_normal(mean=0, sd=1, low=0, upp=10):
    return truncnorm((low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)

In [45]:
#def softmax(vector):
#    print(vector.shape)
#    result = np.array(vector.shape)
#    exponential_sum = 0
#    for i in range(len(vector)):
#        exponential_sum += np.exp(vector[i])
#    for i in range(len(vector)):
#        #print(sum(np.exp(vector[i]) / exponential_sum))
#        result[i] = np.exp(vector[i]) / exponential_sum
#        #print(result[0][i])
#    return result

In [48]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

In [49]:
from scipy.special import expit as sigmoid

class NeuralNetwork:
    
    def __init__(self, num_input_nodes, num_hidden_nodes, num_output_nodes, learn_rate):
        self.num_input_nodes = num_input_nodes
        self.num_hidden_nodes = num_hidden_nodes
        self.num_output_nodes = num_output_nodes
        self.learn_rate = learn_rate
        self.create_weight_matrices()
        
    def create_weight_matrices(self):
        bound = 1 / np.sqrt(self.num_input_nodes)
        X = truncated_normal(mean=0, sd=1, low=-bound, upp=bound)
        self.weight_matrix_input_hidden = X.rvs((self.num_hidden_nodes, self.num_input_nodes))
        
        bound = 1 / np.sqrt(self.num_hidden_nodes)
        X = truncated_normal(mean=0, sd=1, low=-bound, upp=bound)
        self.weight_matrix_hidden_output = X.rvs((self.num_output_nodes, self.num_hidden_nodes))
        
    def run(self, input_vector):
        output_vector = np.dot(self.weight_matrix_input_hidden, input_vector)
        output_vector = softmax(output_vector)
        
        output_vector = np.dot(self.weight_matrix_hidden_output, output_vector)
        output_vector = softmax(output_vector)
        
        return output_vector
    
    def train(self, input_vector, target_vector):
        output_vector_1 = np.dot(self.weight_matrix_input_hidden, input_vector)
        output_vector_hidden = softmax(output_vector_1)
        
        output_vector_2 = np.dot(self.weight_matrix_hidden_output, output_vector_hidden)
        output_vector_result = softmax(output_vector_2)
        
        output_errors = output_vector_result - target_vector
        for i in range(0, len(output_errors)):
            if output_errors[i] < 0:
                output_errors[i] = 0
        
        temp = output_errors * output_vector_result * (1.0 - output_vector_result)
        self.weight_matrix_hidden_output += self.learn_rate * np.dot(temp, output_vector_hidden.T)
        
        hidden_errors = np.dot(self.weight_matrix_hidden_output.T, output_errors)
        
        temp = hidden_errors * output_vector_hidden * (1.0 - output_vector_hidden)
        self.weight_matrix_input_hidden += self.learn_rate * np.dot(temp, input_vector.T)

In [82]:
test = NeuralNetwork(num_input_nodes=10000, num_output_nodes=31, num_hidden_nodes=2, learn_rate=0.22)

percent_to_run = 80
num_train_to_run = int(len(image_vectors) * percent_to_run / 100)
num_test_to_run = len(image_vectors) - num_train_to_run

train_images = image_vectors[:num_train_to_run]
test_images = image_vectors[-num_train_to_run:]

#for i in range(num_train_to_run):
for i in range(1):
    point = train_images[i]
    label = target_vectors[i]
    test.train(point, label)
    #print(test.weight_matrix_hidden_output[12][1])

In [83]:
for i in range(num_test_to_run):
    point = train_images[-i]
    label = target_vectors[-i]
    result = test.run(point)
    print(np.argmax(result), np.argmax(label))

19 18
19 30
28 28
28 19
19 11
19 23
19 4
19 25
28 13
19 9
19 12
19 2
19 19
19 19
19 29
19 16
19 21
19 5
19 23
19 23
19 2
19 10
19 15
19 6
19 13
19 10
19 24
19 9
19 29
19 10
19 9
19 4
19 27
19 18
28 11
19 23
19 4
28 23
19 20
19 17
19 12
19 24
19 23
19 10
19 29
19 22
19 14
19 7
19 30
19 29
19 23
19 3
28 2
19 2
19 2
19 11
19 17
19 28
19 18
19 30
19 15
19 4
19 0
19 21
19 24
19 29
19 9
19 10
19 14
28 19
19 17
19 3
19 2
19 8
28 28
19 4
19 16
19 21
19 30
19 9
19 19
19 29
19 17
19 25
19 9
19 15
19 23
19 22
19 16
19 2
19 0
19 29
19 21
19 11
19 12
19 4
19 18
19 16
19 14
19 8
19 25
19 17
19 12
19 12
19 15
19 3
19 27
19 12
19 5
19 4
19 6
19 24
19 17
19 22
19 28
28 11
19 1
19 7
19 24
19 7
19 29
19 10
19 24
19 2
19 1
19 4
19 4
19 13
19 26
19 28
19 15
19 3
19 3
19 22
19 16
19 6
19 12
28 14
19 18
19 23
19 30
19 19
19 28
19 10
19 5
19 2
19 25
19 4
19 18
19 15
19 6
19 12
19 8
19 4
19 12
19 30
19 10
28 7
19 14
19 6
19 28
19 2
19 14
19 4
19 25
19 23
19 28
19 23
19 1
19 23
19 10
19 13
19 25
19 23
19 17
19 

19 2
19 7
19 5
19 28
19 14
19 2
19 13
28 29
19 19
19 12
28 14
19 30
19 1
19 13
19 26
19 25
19 29
19 14
19 27
19 22
19 10
28 13
19 3
19 24
19 20
19 13
19 28
19 13
19 21
19 6
19 13
19 19
19 28
19 7
19 18
19 25
19 17
19 7
19 1
19 12
19 18
19 11
19 21
19 17
19 24
19 2
19 26
19 28
19 23
19 15
19 22
19 16
19 2
19 29
19 28
19 28
19 28
19 1
19 30
19 4
19 23
19 28
19 24
19 30
19 23
19 2
19 5
28 28
19 13
19 13
19 3
19 4
19 30
19 25
19 11
19 15
19 6
19 22
19 8
19 29
19 13
19 10
19 25
19 7
19 14
19 17
19 28
19 17
19 13
19 13
19 14
19 1
19 4
19 4
19 7
19 30
19 4
19 28
19 12
19 12
19 2
19 28
19 15
19 23
19 10
19 26
19 4
19 12
19 12
19 15
28 30
19 29
19 9
28 14
19 30
19 2
19 6
19 24
19 28
19 2
19 5
19 27
19 17
19 6
19 28
19 12
19 0
19 4
19 4
19 29
19 17
19 19
19 7
19 27
28 26
19 19
19 17
19 21
19 15
19 24
19 2
19 16
19 23
19 10
19 4
19 24
19 2
19 8
19 4
19 23
19 7
19 28
19 13
19 0
19 15
19 15
19 23
19 28
19 18
19 25
19 24
19 24
19 26
19 22
19 5
19 17
19 29
19 24
19 10
19 30
19 24
19 28
19 15
19 21
19