# Loading Data

In [1]:
import pandas as pd 
from skimage.io import imread
import numpy as np

def read_data(typeData, labelsInfo, imageSize, path):
 #Intialize x  matrix
 x = np.zeros((labelsInfo.shape[0], imageSize))

 for (index, idImage) in enumerate(labelsInfo["ID"]):
  #Read image file
  nameFile = "{0}/{1}Resized/{2}.Bmp".format(path, typeData, idImage)
  img = imread(nameFile, as_grey=True)

  x[index, :] = np.reshape(img, (1, imageSize))
 return x

In [2]:
imageSize = 400 # 20 x 20 pixels

#Set location of data files , folders
path = "/home/faizy/workspace/julia"

labelsInfoTrain = pd.read_csv("{0}/trainLabels.csv".format(path))

#Read training matrix
xTrain = read_data("train", labelsInfoTrain, imageSize, path)

#Read information about test data ( IDs ).
labelsInfoTest = pd.read_csv("{0}/sampleSubmission.csv".format(path))

#Read test matrix
xTest = read_data("test", labelsInfoTest, imageSize, path)

yTrain = map(ord, labelsInfoTrain["Class"])

# Defining main functions

In [3]:
def euclidean_distance (a, b):
 dif = a - b
 return dif.dot(dif)

In [4]:
def get_k_nearest_neighbors(x, i, k):
 imageI = x[i,:]
 distances = [euclidean_distance(imageI, x[j,:]) for j in xrange(x.shape[0])]     
 sortedNeighbors = np.argsort(distances)
 kNearestNeighbors = sortedNeighbors[1:(k+1)]
 return kNearestNeighbors

In [5]:
def assign_label(x, y, k, i):
 kNearestNeighbors = get_k_nearest_neighbors(x, i, k)
 counts = {}
 highestCount = 0
 mostPopularLabel = 0
 for n in kNearestNeighbors:
  labelOfN = y[n]
  if labelOfN not in counts :
   counts[labelOfN] = 0
  counts[labelOfN] += 1
  if counts[labelOfN] > highestCount :
   highestCount = counts[labelOfN]
   mostPopularLabel = labelOfN
 return mostPopularLabel

# Running LOOF-CV with 1NN sequentially

In [6]:
import time
start = time.time()
k=1
yPredictions = [assign_label(xTrain, yTrain, k, i) for i in xrange(xTrain.shape[0])]
print time.time() - start, "seconds elapsed"

121.855827093 seconds elapsed


In [7]:
yPredictions

[110,
 83,
 84,
 73,
 48,
 102,
 76,
 73,
 75,
 78,
 65,
 65,
 65,
 117,
 116,
 65,
 79,
 73,
 111,
 107,
 116,
 79,
 74,
 85,
 100,
 90,
 101,
 69,
 78,
 101,
 67,
 112,
 78,
 84,
 73,
 65,
 119,
 73,
 76,
 114,
 82,
 103,
 97,
 78,
 72,
 79,
 107,
 49,
 65,
 66,
 111,
 108,
 72,
 65,
 48,
 87,
 49,
 68,
 110,
 102,
 76,
 79,
 76,
 67,
 83,
 69,
 79,
 82,
 73,
 72,
 48,
 111,
 83,
 119,
 78,
 54,
 110,
 99,
 111,
 79,
 50,
 101,
 70,
 82,
 76,
 101,
 114,
 73,
 84,
 78,
 48,
 115,
 76,
 83,
 69,
 49,
 73,
 49,
 114,
 111,
 66,
 108,
 101,
 53,
 69,
 76,
 101,
 73,
 65,
 70,
 68,
 55,
 101,
 69,
 80,
 79,
 83,
 73,
 97,
 84,
 54,
 108,
 72,
 101,
 79,
 68,
 65,
 80,
 84,
 83,
 82,
 76,
 79,
 110,
 114,
 117,
 112,
 74,
 108,
 76,
 65,
 105,
 66,
 88,
 65,
 69,
 116,
 85,
 51,
 73,
 79,
 108,
 69,
 76,
 65,
 110,
 69,
 82,
 101,
 65,
 78,
 84,
 69,
 78,
 67,
 48,
 84,
 76,
 72,
 88,
 77,
 82,
 82,
 114,
 78,
 84,
 114,
 69,
 69,
 85,
 68,
 68,
 101,
 82,
 65,
 73,
 69,
 69,
 86,
 114,
 