In [2]:
import os
import numpy as np
from random import random
import matplotlib.pyplot as plt
from math import *

In [88]:
# get data
data_dir="./cifar-10-batches-py/"
'''
data files contain three binary part.
batch_label: which represents the index of the dataset.
labels: which represents the category of the images, 
        the index of the row is also the row index of the image.
data: which is the image data that has 3072 columns, 
        the first 1024 columns represent the red channels of the 32x32 image, 
        the next 1024 columns represent the green one, 
        the last represent the blue one.
filenames: which is the name of the images.
'''
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def get_batch_data():
    batch_train=[]
    for i in range(1, 6):
        batch_train.append(unpickle(data_dir + "data_batch_" + i.__str__()))

    batch_test = unpickle(data_dir + "test_batch")
    return batch_train, batch_test

train_list,test_list= get_batch_data()
train_X = np.array(train_list[0].get(b'data'))
train_y=np.array(train_list[0].get(b'labels'))
for i in range(1,len(train_list)):
    train_X = np.concatenate((train_X,train_list[i].get(b'data')),axis=0)
    train_y = np.concatenate((train_y,train_list[i].get(b'labels')),axis=0)
test_X = np.array(test_list.get(b'data'))
test_y = np.array(test_list.get(b'labels'))
print("train: ",train_X.shape,train_y.shape)
print("test:  ",test_X.shape,test_y.shape)

train:  (50000, 3072) (50000,)
test:   (10000, 3072) (10000,)


In [4]:
def predict(m,x):
    W=m['W']
    b=m['b']
    z=np.matmul(x,W)+b
    exp_scores=np.exp(z)
    a=exp_scores / np.sum(exp_scores,axis=1,keepdims=True)
    return np.argmax(a,1)

In [89]:
X=train_X/255
Y=np.eye(train_y.shape[0],train_y.max()-train_y.min()+1)[train_y]
test_X=test_X/255
num_sample=X.shape[0]
input_dims=X.shape[1]
output_dims=Y.shape[1]

[[0.61960784 0.62352941 0.64705882 ... 0.48627451 0.50588235 0.43137255]
 [0.92156863 0.90588235 0.90980392 ... 0.69803922 0.74901961 0.78039216]
 [0.61960784 0.61960784 0.54509804 ... 0.03137255 0.01176471 0.02745098]
 ...
 [0.07843137 0.0745098  0.05882353 ... 0.19607843 0.20784314 0.18431373]
 [0.09803922 0.05882353 0.09019608 ... 0.31372549 0.31764706 0.31372549]
 [0.28627451 0.38431373 0.38823529 ... 0.36862745 0.22745098 0.10196078]]


In [90]:
W=np.random.randn(input_dims,output_dims)/10
b=np.zeros((1,output_dims))
model={'W':W,'b':b}

In [97]:
# hyper 
epsilon = 0.00001
reg_lambda = 5
epochs = 1
batch_size = 500

In [98]:
W=model['W']
b=model['b']
n=ceil(num_sample / batch_size)
for i in range(epochs):
    for j in range(n):
        xx=X[batch_size*j:batch_size*(j+1)]
        yy=train_y[batch_size*j:batch_size*(j+1)]
        zz=np.matmul(xx,W)+b
        exp_scores=np.exp(zz)
        A = exp_scores / np.sum(exp_scores,axis=1,keepdims=True)
        delta = A
        delta[range(len(yy)),yy]-=1
        dW=np.matmul(xx.T,delta)
        #print(dW)
        db=np.sum(delta,axis=0,keepdims=True)
        #print(dW)
        dW+=reg_lambda*W
        #print(db)
        W-=epsilon*dW
        b-=epsilon*db
        print("test acc: ",np.equal(predict(model,test_X),test_y).mean())
        print("train acc: ",np.equal(predict(model,X),train_y).mean())
        print("\n")

test acc:  0.2878
train acc:  0.28904


test acc:  0.3088
train acc:  0.3069


test acc:  0.3206
train acc:  0.31988


test acc:  0.325
train acc:  0.32846


test acc:  0.3314
train acc:  0.33448


test acc:  0.3363
train acc:  0.34042


test acc:  0.3414
train acc:  0.34442


test acc:  0.3472
train acc:  0.3477


test acc:  0.3485
train acc:  0.34852


test acc:  0.3499
train acc:  0.34866


test acc:  0.3514
train acc:  0.34934


test acc:  0.349
train acc:  0.34942


test acc:  0.3493
train acc:  0.34916


test acc:  0.3493
train acc:  0.34906


test acc:  0.3503
train acc:  0.3487


test acc:  0.3485
train acc:  0.34864


test acc:  0.3488
train acc:  0.34816


test acc:  0.3497
train acc:  0.34922


test acc:  0.3487
train acc:  0.348


test acc:  0.3493
train acc:  0.34834


test acc:  0.3514
train acc:  0.34914


test acc:  0.3512
train acc:  0.34872


test acc:  0.3508
train acc:  0.3487


test acc:  0.348
train acc:  0.34876


test acc:  0.349
train acc:  0.349


test acc:  0