In [67]:
import sys
import numpy as np
import matplotlib.pyplot as plt
from math import *
from random import random
import cv2

In [68]:
# get data
data_dir="../cifar-10-batches-py/"
'''
data files contain three binary part.
batch_label: which represents the index of the dataset.
labels: which represents the category of the images, 
        the index of the row is also the row index of the image.
data: which is the image data that has 3072 columns, 
        the first 1024 columns represent the red channels of the 32x32 image, 
        the next 1024 columns represent the green one, 
        the last represent the blue one.
filenames: which is the name of the images.
'''
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def get_batch_data():
    batch_train=[]
    for i in range(1, 6):
        batch_train.append(unpickle(data_dir + "data_batch_" + i.__str__()))

    batch_test = unpickle(data_dir + "test_batch")
    return batch_train, batch_test

train_list,test_list= get_batch_data()
train_X = np.array(train_list[0].get(b'data'))
train_y=np.array(train_list[0].get(b'labels'))
for i in range(1,len(train_list)):
    train_X = np.concatenate((train_X,train_list[i].get(b'data')),axis=0)
    train_y = np.concatenate((train_y,train_list[i].get(b'labels')),axis=0)

train_images = train_X/255.0
train_labels=train_y    
test_images = np.array(test_list.get(b'data'))/255.0
test_labels = np.array(test_list.get(b'labels'))
print("train: ",train_images.shape,train_labels.shape)
print("test:  ",test_images.shape,test_labels.shape)



train:  (50000, 3072) (50000,)
test:   (10000, 3072) (10000,)


In [69]:
def make_one_hot(data):
    return (np.arange(10)==data[:,None]).astype(np.integer)

In [70]:
train_images_with_noise=train_images+np.random.randn(train_images.shape[0],train_images.shape[1])/5
onehot_train_labels=make_one_hot(train_labels)

In [71]:
def calculate_loss(model, X, y):
    num_examples = X.shape[0]
    batch_size=num_examples
    t=ceil(num_examples/batch_size)
    y_=[]
    for j in range(t):
        a=X[batch_size*j:batch_size*(j+1)]
        for k in range(len(nn_hdim)+1):
            z=np.matmul(a,model['W'][k])+model['b'][k]
            if k!=(len(nn_hdim)):
                a=(abs(z)+z)/2
            else:
                exp_scores=np.exp(z)
                a=exp_scores/np.sum(exp_scores,axis=1,keepdims=True)
        if len(y_)==0:
            y_=a
        else:
            y_=np.vstack([y_,a])
    print(len(y_))
    print(len(y))        
    corect_logprobs = -np.log(y_[range(num_examples), y]+1e-10)
    data_loss = np.sum(corect_logprobs)
    # Add regulatization term to loss (optional)
    for j in model['W']:
        data_loss+=(reg_lambda/2)*np.sum(np.square(j))
    return (1. / num_examples) * data_loss


In [72]:
def predict(model,x):
    a=x
    for j in range(len(nn_hdim)+1):
        z=np.matmul(a,model['W'][j])+model['b'][j]
        if j!=len(nn_hdim):
            a=(abs(z)+z)/2
        else:
            exp_scores=np.exp(z)
            a=exp_scores/np.sum(exp_scores,axis=1,keepdims=True)
    return np.argmax(a,1)

In [79]:
nn_hdim=[300,300,300,300]
input_dims=train_images.shape[1]
output_dims=10
all_dim=[input_dims]+nn_hdim+[output_dims]
model={'W':[],'b':[],'hd':nn_hdim}
W=model['W']
b=model['b']
for i in range(len(nn_hdim)+1):
    W.append(np.random.randn(all_dim[i],all_dim[i+1])/np.sqrt(all_dim[i+1]))
    b.append(np.zeros((1,all_dim[i+1])))

In [80]:
n=ceil(train_images.shape[0]/batch_size)
w_num=len(nn_hdim)
W=model['W']
b=model['b']
for i in range(epochs):
    for j in range(n*2):
        if j<n:
            xx=train_images[batch_size*(j%n):batch_size*((j+1)%n)]
        elif j<2*n:
            xx=train_images_with_noise[batch_size*(j%n):batch_size*((j+1)%n)]
        yy_onehot=onehot_train_labels[batch_size*(j%n):batch_size*((j+1)%n)]
        #yy=train_images[batch_size*j:batch_size*(j+1)]
        dW=[]
        db=[]
        z=[]
        a=[]
        drop=[]
        a.append(xx)
        for k in range(w_num+1):
            z_temp=np.matmul(a[-1],W[k])+b[k]
            if k!=w_num:
                drop.append(np.less(np.random.uniform(size=z_temp.shape),remain_pro)/remain_pro)
                z_temp*=drop[-1]
            z.append(z_temp)
            if k!=w_num:
                a.append((abs(z_temp)+z_temp)/2)
            else:
                exp_scores=np.exp(z_temp)
                a_temp=exp_scores/np.clip(np.sum(exp_scores,axis=1,keepdims=True),a_min=1e-10,a_max=None)
                if np.isnan(a_temp).any():
                    raise ValueError('there is a nan')
                a.append(a_temp)
        # a: xx a1-an
        # z: z1-zn        
        for k in range(w_num+1):
            if k==0:
                delta=a[-1]-yy_onehot
            else:
                delta=np.matmul(delta,W[w_num+1-k].T)*((np.sign(a[w_num+1-k])+1.)/2.)*drop[w_num-k]
            dW.append(np.matmul(a[w_num-k].T,delta))
            db.append(np.sum(delta,axis=0))
            #先是Wn,才识W1
        temp_reg_lambda=reg_lambda
        for k in dW:
            k+=temp_reg_lambda*k
            temp_reg_lambda*=1.5
        for i in range(w_num+1):
            W[i]-=learning_rate*dW[w_num-i]
            b[i]-=learning_rate*db[w_num-i]
    if i%1==0:
        yp=predict(model,test_images)
        one_acc=np.equal(yp,test_labels).mean()    
        print(one_acc)

0.2898


In [78]:
learning_rate=0.00002
reg_lambda=0.008
epochs=1
batch_size=1000
remain_pro=0.85

In [24]:
yp=predict(model,test_images)
print(np.equal(yp,test_labels).mean())

0.3829
