In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random


In [2]:
#hyperparameters of the dataset
K = 10 # numbers of class
d = 1024*3 # numbers of pixels (RGB)

In [3]:
# init model
W = np.random.normal(0, 0.01, (K, d))
b = np.random.normal(0, 0.01, (K, 1))

In [4]:
from utils import normalize_images, one_hot
def load(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    X = normalize_images(dict[b'data'])
    y = dict[b'labels']
    Y = one_hot(y, K)
    return X, Y, y

In [5]:
training = load('dataset/data_batch_1')
validation = load('dataset/data_batch_2')
test = load('dataset/test_batch')

In [10]:
from utils import softmax
def evaluateClassifier(X, W, b):
	Ypred = np.add(np.matmul(W, X.T), b)
	Ypred_soft_max = softmax(Ypred)
	return Ypred_soft_max

In [25]:


def computeCost(dataloader, W, b,lamda=0):
    X, Y = dataloader[0], dataloader[1]
    Ypred = evaluateClassifier(X,W,b)
    batch_size = len(X)
    
    err = 0
    for im_idx in range(batch_size):
        err += np.matmul(Y[:,im_idx], np.log(Ypred[:,im_idx]))
    err /= batch_size
    err += lamda*np.sum(np.square(W)) # weight decay 
    return err 

def computeAccuracy(dataloader, W, b):
    X, y = dataloader[0], dataloader[2]
    Ypred = np.argmax(evaluateClassifier(X,W,b), 0)
    return np.sum(Ypred == y)/len(X)

In [20]:
# compute gradient
def computeGradsNum(X, Y, W, b, lamda, h=1e-6):
   
	""" Converted from matlab code """

	grad_W = np.zeros(W.shape)
	grad_b = np.zeros(b.shape)

	c = computeCost(X, Y, W, b, lamda)
	
	for i in range(len(b)):
		b_try = np.array(b)
		b_try[i] += h
		c2 = computeCost(X, Y, W, b_try, lamda)
		grad_b[i] = (c2-c) / h

	for i in range(W.shape[0]):
		for j in range(W.shape[1]):
			W_try = np.array(W)
			W_try[i,j] += h
			c2 = computeCost(X, Y, W_try, b, lamda)
			grad_W[i,j] = (c2-c) / h

	return [grad_W, grad_b]

In [35]:
def resolveWithSDG(dataloader, W, b, lamda, n_batch=20, learning_rate=0.01, max_number_of_epochs=20):
    X, Y = dataloader[0], dataloader[1]
    n_of_data = dataloader[0].shape[0]
    for epoch in range(max_number_of_epochs):
        print()
        for batch_id in range(int(n_of_data/n_batch)):
            X_batch = X
            Y_batch = Y
            grads = computeGradsNum(X_batch, Y_batch, W, b, lamda)
            W -= learning_rate*grads[0]
            b -= learning_rate*grads[1]


In [36]:
resolveWithSDG(training, W, b, lamda)




TypeError: computeCost() takes from 3 to 4 positional arguments but 5 were given