In [2]:
import cPickle
import numpy as np
import os
import pdb

def unpickle(file_path):
	"""
	Unpickles a cifar batch file.

	Parameters
	----------
	file_path : string
		path to the cifar10 batch file, which is a Python "pickled" object produced with cPickle. 

	Returns
	-------
	dict
		a dictioniary with the following elements:
		
		data (string) -> numpy.ndarray
			a 10000x3072 numpy array of uint8s. Each row of the array stores a 32x32 colour image. 
			The first 1024 entries contain the red channel values, the next 1024 the green, and the final 1024 the blue. 
			The image is stored in row-major order, so that the first 32 entries of the array are the red channel values 
			of the first row of the image.
			
		labels (string) -> list
			a list of 10000 numbers in the range 0-9. The number at index i indicates the label of the ith image 
			in the array data.
	"""
	with open(file_path, 'rb') as f:
		datadict = cPickle.load(f)
	return datadict

def load_cifar_batches(root_path):
	"""
	Load all cifar batches into a dictionary.

	Parameters
	----------
	root_path
		path to directory where all 6 cifar10 batch files are stored

	Returns
	-------
	dict
		a dictionary with the following elements: 

		training_data (string) -> numpy.ndarray
			a 50000x3072 numpy array of uint8s. Each row of the array stores a 32x32 colour image.			

		training_labels (string) -> numpy.ndarray
			a numpy array of 50000 vectors that each represent labels from 0 to 9. The number at index i indicates the label 
			of the ith image in the training data.

		test_data (string) -> numpy.ndarray
			a 10000x3072 numpy array of uint8s. Each row of the array stores a 32x32 colour image.		
		
		test_labels (string) -> numpy.ndarray
			a numpy array of 50000 vectors that each represent labels from 0 to 9. The number at index i indicates the label 
			of the ith image in the test data.
 	"""
 	dataset = {}

 	# training data (5 batch files)
 	training_data = []
 	training_labels = []
 	for i in range(1, 2): # range(1, 6) for all 5 datasets, use just 1 for development
 		batch_file_name = 'data_batch_' + str(i)
 		file_path = os.path.join(root_path, batch_file_name)
 		
 		datadict = unpickle(file_path)
 	
 		training_data.extend(datadict['data'])
 		training_labels.extend(datadict['labels'])

 	dataset['training_data'] = np.array(training_data)
 	dataset['training_labels'] = np.array([vectorize_label(label) for label in training_labels])

 	# test data (1 batch file)
 	file_path = os.path.join(root_path, 'test_batch')

 	datadict = unpickle(file_path)
 	dataset['test_data'] = datadict['data']
 	dataset['test_labels'] = np.array(datadict['labels'])

 	return dataset

@np.vectorize
def vectorize_label(n):
	"""
	Constructs a 10 x 1 vector of length 10 based on the input, so that the (n-1)-th element in the vector is 1 and all the other
	elements are 0.

	Parameters
	----------
	int : label
		The label of the class.

	Returns
	-------
	numpy.ndarray
		The vectorized representation of the label.	
	"""
	v = np.zeros(10)
	v[n] = 1
	return v

In [19]:
import pdb
import numpy as np
import matplotlib.pyplot as plt
from data_utils import load_cifar_batches

class Network():
	def __init__(self, sizes):
		self.num_layers = len(sizes)
		self.sizes = sizes

		# initialize random weights
		self.w1 = 0.001 * np.random.randn(sizes[1], sizes[0]) # hidden layer weights 
		self.w2 = 0.001 * np.random.randn(sizes[2], sizes[1]) # output layer weights

		# initialize biases 
		self.b1 = 0.001 * np.random.randn(sizes[1], 1)
		self.b2 = 0.001 * np.random.randn(sizes[2], 1)

	def predict(self, test_data):
		predictions = []

		for x in test_data:
			x = x.reshape(3072, 1)
			y = Network.sigmoid(np.dot(self.w1, x) + self.b1)
			z = np.dot(self.w2, y) + self.b2

			predictions.append(z.argmax())

		return np.array(predictions)

	def train(self, training_data, training_labels, batch_size=10, epochs=1, learning_rate=0.1):
		data_mini_batches = [training_data[n:n+batch_size] for n in range(0, len(training_data), batch_size)]
		label_mini_batches = [training_labels[n:n+batch_size] for n in range(0, len(training_labels), batch_size)]
		
		# calculate batches in 1/10 epoch for plotting 
		batches_in_tenth_epoch = len(training_data) / 10 / batch_size
		tenth_epoch_losses = []
		tenth_epoch_loss = 0

		total_instances = len(training_data) * epochs

		for epoch in range(epochs):
			for i, batch in enumerate(data_mini_batches):
				# print how many percentage till finished training 
				percentage = round(float(i * batch_size + epoch * len(training_data)) / total_instances * 100, 2) 
				print str(percentage) + "%" 

				# FEEDFORWARD
				# reshape array in order to easily calculate dot product
				x = batch.T
				y = Network.sigmoid(np.dot(self.w1, x) + self.b1)
				z = np.dot(self.w2, y) + self.b2
				p = np.apply_along_axis(self.softmax, 0, z)

				# calculate loss
				loss = np.zeros(batch_size)
				for j, (a, b) in enumerate(zip(p.T, label_mini_batches[i])):
					loss[j] = self.cross_entropy_loss(a, b)

				batch_loss = sum(loss) / batch_size
				tenth_epoch_loss += batch_loss

				if not epoch and not i:
					tenth_epoch_losses.append(tenth_epoch_loss)
					tenth_epoch_loss = 0
				elif not i % batches_in_tenth_epoch:
					tenth_epoch_losses.append(tenth_epoch_loss / batches_in_tenth_epoch)
					tenth_epoch_loss = 0

				# BACKPROPAGATION
				# output layer (z) gradient
				dloss_dz = p - label_mini_batches[i].T
				dloss_dw2 = np.dot(dloss_dz, y.T)

				# hidden layer (y) gradient
				dloss_dy = np.dot(self.w2.T, dloss_dz)
				dy_da = self.sigmoid_derivative(y)
				dloss_da = dloss_dy * dy_da
				dloss_dw1 = np.dot(dloss_da, x.T)
				print "len(x)"+ str(len(x))
				print "len(x[0])"+ str(len(x[0]))
				print "len(w1)"+ str(len(self.w1))
				print "len(w1[0])"+ str(len(self.w1[0]))
				print "len(w2)"+ str(len(self.w2))
				print "len(w2[0])"+ str(len(self.w2[0]))
				print "len(b1)"+ str(len(self.b1))
				print "len(b1[0])"+ str(len(self.b1[0]))
				print "len(b2)"+ str(len(self.b2))
				print "len(b2[0])"+ str(len(self.b2[0]))
				print "len(dy_da)"+ str(len(dy_da))
				print "len(dy_da[0])"+ str(len(dy_da[0]))
				print "len(y)"+ str(len(y))
				print "len(y[0])"+ str(len(y[0]))
				print "len(dloss_dz)"+ str(len(dloss_dz))
				print "len(dloss_dz[0])"+ str(len(dloss_dz[0]))
				print "len(dloss_dy)"+ str(len(dloss_dy))
				print "len(dloss_dy[0])"+ str(len(dloss_dy[0]))
				print "len(dloss_da)"+ str(len(dloss_da))
				print "len(dloss_da[0])"+ str(len(dloss_da[0]))
				print "dloss_dw1"+ str(dloss_dw1)
				print "dloss_dw2"+ str(dloss_dw2)
				print "len(dloss_dw1)"+ str(len(dloss_dw1))
				print "len(dloss_dw2)"+ str(len(dloss_dw2))
				print "len(dloss_dw1[0])"+ str(len(dloss_dw1[0]))
				print "len(dloss_dw2[0])"+ str(len(dloss_dw2[0]))
				print "np.sum(dloss_da, axis=1).reshape(self.sizes[1], 1)"+ str(np.sum(dloss_da, axis=1).reshape(self.sizes[1], 1))
				print "np.sum(dloss_dz, axis=1).reshape(self.sizes[2], 1)"+ str(np.sum(dloss_dz, axis=1).reshape(self.sizes[2], 1))
				print "len(np.sum(dloss_da, axis=1).reshape(self.sizes[1], 1))"+ str(len(np.sum(dloss_da, axis=1).reshape(self.sizes[1], 1)))
				print "len(np.sum(dloss_dz, axis=1).reshape(self.sizes[2], 1)[0])"+ str(len(np.sum(dloss_dz, axis=1).reshape(self.sizes[2], 1)))
				print "len(np.sum(dloss_da, axis=1).reshape(self.sizes[1], 1))"+ str(len(np.sum(dloss_da, axis=1).reshape(self.sizes[1], 1))[0])
				print "len(np.sum(dloss_dz, axis=1).reshape(self.sizes[2], 1)[0])"+ str(len(np.sum(dloss_dz, axis=1).reshape(self.sizes[2], 1))[0])
				# perform weight update
				self.w1 -= dloss_dw1 * learning_rate
				self.w2 -= dloss_dw2 * learning_rate

				# perform bias update
				self.b1 -= np.sum(dloss_da, axis=1).reshape(self.sizes[1], 1) * learning_rate
				self.b2 -= np.sum(dloss_dz, axis=1).reshape(self.sizes[2], 1) * learning_rate

		# plot epoch losses 
		plt.plot(range(len(tenth_epoch_losses)), tenth_epoch_losses)
		plt.xlabel("1/10 epoch")
		plt.ylabel("Loss")
		plt.show()

	def cross_entropy_loss(self, probability_distribution, target_vector):
		"""
		Cross entropy loss function.
		"""
		return -np.sum((target_vector * np.log(probability_distribution)))

	def softmax(self, z):
		"""
		Takes a vector of arbitrary real-valued scores and squashes it to a vector of values 
		between zero and one that sum to one. This vector represents a probability distribution
		over mutually exclusive alternatives.

		Parameters
		----------
		z : numpy.ndarray
			a vector with real values that represent the scores of each class. 

		Returns
		-------
		numpy.ndarray
			 a vector of values between zero and one that sum to one.
		"""
		f = z - np.max(z)
		return np.exp(f) / np.sum(np.exp(f))

	@staticmethod
	@np.vectorize
	def sigmoid(x):
		""" 
		A function that takes a real-valued number and "squashes" it into range between 0 and 1, so
		that large negative numbers become 0 and large positive numbers become 1.  

		Parameters
		----------
		x : numpy.ndarray
			an array of real values, each of them is given as an argument to the sigmoid function.

		Returns
		-------
		numpy.ndarray
			an array of real values that are "squashed" into range between 0 and 1 by the function.
		""" 
		if x >= 0:
			z = np.exp(-x)
			return 1 / (1 + z)
		else:
			# if x is less than zero then z will be small, denominator can't be zero because it's 1+z
			z = np.exp(x)
			return z / (1 + z)

	def sigmoid_derivative(self, y):
		return self.sigmoid(y) * (1 - self.sigmoid(y))

	@staticmethod
	def test_accuracy(x, y):
		count = 0
		for i, prediction in enumerate(x):
			if (prediction == y[i]):
				count += 1

		return float(count) / x.shape[0]

root_path = ''
d = load_cifar_batches(root_path)

nn = Network([3072, 1541, 10])
nn.train(d['training_data'], d['training_labels'])

a = nn.predict(d['test_data'])
b = d['test_labels']
print "Test accuracy: " + str(Network.test_accuracy(a, b))




0.0%
len(x)3072
len(x[0])10
len(w1)1541
len(w1[0])3072
len(w2)10
len(w2[0])1541
len(b1)1541
len(b1[0])1
len(b2)10
len(b2[0])1
len(dy_da)1541
len(dy_da[0])10
len(y)1541
len(y[0])10
len(dloss_dz)10
len(dloss_dz[0])10
len(dloss_dy)1541
len(dloss_dy[0])10
len(dloss_da)1541
len(dloss_da[0])10
dloss_dw1[[ 0.14044871  0.1365376   0.11570444 ...,  0.15015971  0.14013992
   0.13222632]
 [ 0.07311027  0.05873417  0.05760976 ...,  0.00784817  0.01347021
   0.01930434]
 [-0.17713218 -0.16077266 -0.15222394 ..., -0.07362224 -0.06708658
  -0.07012962]
 ..., 
 [ 0.01925181  0.03692618  0.03006364 ..., -0.00236142  0.00096526
  -0.00323383]
 [ 0.00504042  0.02233106  0.02280095 ...,  0.03314592  0.03149648
   0.02741346]
 [-0.12736011 -0.11270126 -0.11245256 ..., -0.06905135 -0.05598265
  -0.05758536]]
dloss_dw2[[  8.56811872e-01   1.05774948e+00   3.29961900e-01 ...,   4.34137891e-01
    3.33603910e-01   9.51719821e-04]
 [ -1.15615499e+00  -1.01602779e+00  -5.29378260e-01 ...,   3.41417340e-01
   -6.

TypeError: 'int' object has no attribute '__getitem__'

In [8]:
x=np.array([[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5]])

In [11]:
y=np.array([[1,2,3,4,5,1,2,3,4,5],[1,2,3,4,5,1,2,3,4,5],[1,2,3,4,5,1,2,3,4,5],[1,2,3,4,5,1,2,3,4,5],[1,2,3,4,5,1,2,3,4,5]])

In [7]:
len(np.array([[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5],[1,2,3,4,5]]))

5

In [9]:
len(x)

5

In [10]:
len(x[0])

5

In [12]:
len(y[0])

10

In [20]:
z=x*x

In [21]:
z

array([[ 1,  4,  9, 16, 25],
       [ 1,  4,  9, 16, 25],
       [ 1,  4,  9, 16, 25],
       [ 1,  4,  9, 16, 25],
       [ 1,  4,  9, 16, 25]])

In [23]:
a=np.array([[1,1],[1,1],[1,1]])

In [24]:
b=np.array([[1,2],[3,4],[4,5]])

In [25]:
a*b

array([[1, 2],
       [3, 4],
       [4, 5]])

In [26]:
b*b

array([[ 1,  4],
       [ 9, 16],
       [16, 25]])

In [27]:
a.argmax()

0

In [28]:
a

array([[1, 1],
       [1, 1],
       [1, 1]])

In [29]:
a[0].argmax()

0

In [30]:
b.argmax()

5

In [31]:
b[0].argmax()

1