In [None]:
import numpy as np
import edf
from time import time
import sys
import matplotlib.pyplot as plt

data = np.load('../c10_data/train.npz')
t_imgs = np.float32(data['imgs'])/255.

# Reshape the train image data to (idx, h, w, channel)
t_imgs = t_imgs.reshape(50000, 32, 32, 3)
t_labels = np.float32(data['labels'])

data = np.load('../c10_data/test.npz')
v_imgs = np.float32(data['imgs'])/255.

# Reshape the valid image data to (idx, h, w, channel)
v_imgs = v_imgs.reshape(10000, 32, 32, 3)
v_labels = np.float32(data['labels'])

In [None]:

########################################### Convolution layer#############################################
############################### Please implement the forward abd backward method in this class ############## 
class ConvNaive:

    def __init__(self,f,k,stride=1,pad=0):
        edf.components.append(self)
        self.f = f
        self.k = k
        pad = np.array(pad)
        if pad.shape == ():
            self.xpad = self.ypad = pad
        else:
            self.ypad = pad[0]
            self.xpad = pad[1]
            
        self.stride=stride
        self.grad = None if f.grad is None and k.grad is None else edf.DT(0) 

    ####################### Please implement this function####################### 
    def forward(self):

        fshape = self.f.value.shape
        kshape = self.k.value.shape 
                      
        b = fshape[0]
        h = fshape[1]
        w = fshape[2]
        c1 = fshape[3] 
        k = kshape[0]
        ch = kshape[3]
        
        self.value = np.zeros((b,np.int32((h-k+2*self.ypad)/self.stride+1), np.int32((w-k+2*self.xpad)/self.stride+1), ch))
        self.padf = np.zeros((b, h+2*self.ypad, w+2*self.xpad, c1))
        self.padf[:,self.ypad:h+self.ypad,self.xpad:w+self.xpad, :] = self.f.value
        
        # over positions in image 
        for y in range(np.int32((h-k+2*self.ypad)/self.stride + 1)):
            for x in range(np.int32((w-k+2*self.xpad)/self.stride + 1)):
                inx = self.padf[:,y*self.stride:y*self.stride+k,x*self.stride:x*self.stride+k,:].reshape((b, k*k*c1))
                ke = self.k.value.reshape((k*k*c1, ch))
                self.value[:,y,x,:] = np.matmul(inx, ke).reshape((b,ch))
                
    ####################### Please implement this function#######################         
    def backward(self):

        fshape = self.f.value.shape
        kshape = self.k.value.shape

        b = fshape[0]
        c1 = fshape[3]
        k = kshape[0]
        ch = kshape[3]

        h_hat = self.grad.shape[1]
        w_hat = self.grad.shape[2]
        h = (h_hat-1)*self.stride + k # padded image
        w = (w_hat-1)*self.stride + k # padded image
        fil_mid = k//2
                
        if self.f.grad is not None and self.k.grad is not None:
            fgrad = np.zeros((b, h, w, c1))
            kgrad = np.zeros((k, k, c1, ch))
            k_flip = np.transpose(self.k.value, (0,1,3,2))	
            padf_flip = np.transpose(self.padf, (1,2,3,0))

            for y in range(h_hat):
                for x in range(w_hat):
                    out_grad_value = self.grad[:, y, x, :].reshape(b, ch)
                    y_img = y*self.stride; x_img = x*self.stride
                    fgrad[:, y_img:y_img+k, x_img:x_img+k, :] += np.dot(out_grad_value, k_flip).reshape(b,k,k,c1)
                    kgrad += np.dot(padf_flip[y_img:y_img+k, x_img:x_img+k,:,:], out_grad_value)
            
            self.f.grad = self.f.grad + fgrad[:, self.ypad:h-self.ypad,self.xpad:w-self.xpad, :]
            self.k.grad = self.k.grad + kgrad

########################################### MaxPool layer#############################################
############################### Please implement the forward abd backward method in this class ##############             
class MaxPool:
    def __init__(self,x,ksz=2,stride=None):
        edf.components.append(self)
        self.x = x
        self.ksz=ksz
        if stride is None:
            self.stride=ksz
        else:
            self.stride=stride
        self.grad = None if x.grad is None else edf.DT(0)

    ####################### Please implement this function#######################     
    def forward(self):
        st = self.stride
        ksz = self.ksz
        self.value = -np.inf
        for y in range(ksz):
            for x in range(ksz):
                self.value = np.maximum(self.value, self.x.value[:,y::st,x::st,:])

    ####################### Please implement this function#######################             
    def backward(self):
        if self.x.grad is not None:
            st = self.stride
            ksz = self.ksz
            self.x.grad = self.x.grad + np.zeros_like(self.x.value)
            for y in range(ksz):
                for x in range(ksz):
                    self.x.grad[:,y::st,x::st,:] = self.grad * \
                            (self.value == self.x.value[:,y::st,x::st,:]) + \
                            self.x.grad[:,y::st,x::st,:]

                            
########################################### AvePool layer#############################################
############################### Please implement the forward abd backward method in this class ##############                             
class AvePool:
    def __init__(self,x,ksz=2,stride=None):
        edf.components.append(self)
        self.x = x
        self.ksz=ksz
        if stride is None:
            self.stride=ksz
        else:
            self.stride=stride
        self.grad = None if x.grad is None else edf.DT(0)
        
    ####################### Please implement this function#######################   
    def forward(self):
        st = self.stride
        ksz = self.ksz
        self.value = edf.DT(0)
        for y in range(ksz):
            for x in range(ksz):
                self.value += self.x.value[:,y::st,x::st,:]
        self.value = self.value/ksz/ksz

    ####################### Please implement this function#######################    
    def backward(self):
        if self.x.grad is not None:
            st = self.stride
            ksz = self.ksz
            self.x.grad = self.x.grad + np.zeros_like(self.x.value)
            for y in range(ksz):
                for x in range(ksz):
                    self.x.grad[:,y::st,x::st,:] = self.grad/ksz/ksz + \
                            self.x.grad[:,y::st,x::st,:]

In [None]:
# Utility function for shape inference with broadcasting
def bcast(x,y):
    xs = np.array(x.shape)
    ys = np.array(y.shape)
    pad = len(xs)-len(ys)
    if pad > 0:
        ys = np.pad(ys,[[pad,0]],'constant')
    elif pad < 0:
        xs = np.pad(xs,[[-pad,0]],'constant')
    os = np.maximum(xs,ys)
    xred = tuple([idx for idx in np.where(xs < os)][0])
    yred = tuple([idx for idx in np.where(ys < os)][0])
    return xred,yred

'''
  function name: _im2c
  function usage: Reshape the tensor value to specific shape fidx and pick the valid pixel.
'''
def _im2c(value,fidx,vld):
    if vld is not None:
        fmat = np.zeros(np.prod(fidx.shape),dtype=edf.DT)
        fmat[vld] = value.reshape([-1])[fidx.reshape([-1])[vld]]
    else:
        fmat = value.reshape([-1])[fidx.reshape([-1])]
    fmat = fmat.reshape(fidx.shape)
    return fmat


'''
  Class name: Conv
  Class usage: convolution layer given image feature f and filter k, stride and pad. this is use the image to column trick and fast. 
  Class function:
      forward: do concolution
      backward: calculate derivative w.r.t to f and filter k
      
'''
class Conv:
    
    def __init__(self,f,k,stride=1,pad=0):
        edf.components.append(self)
        self.f = f
        self.k = k
        pad = np.array(pad)
        if pad.shape == ():
            self.xpad = self.ypad = pad
        else:
            self.ypad = pad[0]
            self.xpad = pad[1]
        self.stride=stride
        self.grad = None if f.grad is None and k.grad is None else edf.DT(0)

        self.fshape = None
        self.kshape = None


    def im2c_setup(self,fshape,kshape):
        self.fshape = fshape
        self.kshape = kshape

        # For forward pass
        y,x = np.meshgrid(
            range(-self.ypad,fshape[1]+self.ypad-kshape[0]+1,self.stride),
            range(-self.xpad,fshape[2]+self.xpad-kshape[1]+1,self.stride),
            indexing='ij')
        oshape = (fshape[0],)+y.shape+(kshape[-1],)
        yd,xd = np.meshgrid(range(kshape[0]),range(kshape[1]),indexing='ij')
        y = y.reshape([-1,1,1])+yd.reshape([-1,1])
        x = x.reshape([-1,1,1])+xd.reshape([-1,1])
        fidx = np.reshape(range(fshape[0]),[-1,1,1,1])*fshape[1]
        fidx = ((fidx + y)*fshape[2] + x)*fshape[3] + range(fshape[3])
        fidx = fidx.reshape([fidx.shape[0]*fidx.shape[1],-1])
        vld = ((y >= 0) * (y < fshape[1]) * (x >= 0) * (x < fshape[2]))
        if not np.all(vld):
            vld = np.tile(vld[...,np.newaxis],[fshape[0],1,1,fshape[-1]]).reshape(-1)
        else:
            vld = None
        self.fidx = fidx
        self.vld = vld
        self.oshape = oshape

        # For backward pass
        if self.f.grad is None:
            return

        y,x = np.meshgrid(range(fshape[1]),range(fshape[2]),indexing='ij')
        yd,xd = np.meshgrid(range(kshape[0]),range(kshape[1]),indexing='ij')
        y = y.reshape([-1,1,1])-yd.reshape([-1,1])+self.ypad
        x = x.reshape([-1,1,1])-xd.reshape([-1,1])+self.xpad
        bfidx = np.reshape(range(fshape[0]),[-1,1,1,1])*oshape[1]
        bfidx = ((bfidx + y)*oshape[2] + x)*oshape[3] + range(oshape[3])
        bfidx = bfidx.reshape([bfidx.shape[0]*bfidx.shape[1],-1])
        bvld = ((y >= 0) * (y < oshape[1]) * (x >= 0) * (x < oshape[2]))
        if not np.all(bvld):
            bvld = np.tile(bvld[...,np.newaxis],[oshape[0],1,1,oshape[-1]]).reshape(-1)
        else:
            bvld = None
        self.bfidx = bfidx
        self.bvld = bvld

    def forward(self):
        fshape = self.f.value.shape
        kshape = self.k.value.shape
        if fshape != self.fshape or kshape != self.kshape:
            self.im2c_setup(fshape,kshape)

        fmat = _im2c(self.f.value,self.fidx,self.vld)
        kmat = self.k.value.reshape([-1,kshape[-1]])
        if self.k.grad is not None:
            self.fmat = fmat
        self.value = np.matmul(fmat,kmat).reshape(self.oshape)

    def backward(self):
        if self.f.grad is not None:
            gmat = _im2c(self.grad,self.bfidx,self.bvld)
            kmat = np.transpose(self.k.value,[0,1,3,2]).copy().reshape([-1,self.kshape[-2]])
            self.f.grad = self.f.grad + np.matmul(gmat,kmat).reshape(self.fshape)

        if self.k.grad is not None:
            kgrad = np.matmul(self.fmat.T,self.grad.reshape([-1,self.kshape[-1]]))
            self.k.grad = self.k.grad + kgrad.reshape(self.kshape)

In [None]:
# for repeatability
np.random.seed(0)

# Inputs and parameters
inp = edf.Value()
lab = edf.Value()

########################## Simple Convolution Nerual Network Model for Cifar 10 ##################################
################################################ Please implement the model ######################################

prev_channel = 3
pred = inp

# the convolution and pooling layer
W = edf.Param(edf.xavier((3, 3, prev_channel, 32)))
B = edf.Param(np.zeros((32)))
pred = edf.RELU(edf.Add(Conv(pred,W,1,1),B)) # 32+2-3+1=32   
pred = MaxPool(pred, 4) # 8*8
W = edf.Param(edf.xavier((3,3, 32, 64)))
B = edf.Param(np.zeros((64)))
pred = edf.RELU(edf.Add(Conv(pred,W,1,0), B)) # (8-3)/1+1 = 6    
pred = AvePool(pred, 6) # 1*1*64

W = edf.Param(edf.xavier((1, 1, 64, 10)))
B = edf.Param(np.zeros((10)))
pred = edf.RELU(edf.Add(Conv(pred,W,1,0),B)) # 1-1+1=1
pred = edf.Reshape(pred,[-1, 10])

# the classification layer
pred = edf.SoftMax(pred)
loss = edf.Mean(edf.LogLoss(edf.Aref(pred,lab)))
acc = edf.Accuracy(pred,lab)



# evaluation bucket
bucket = 100
def eval_train():    
    
    # we only choose 1/5 of the train images for evaluation since evaluation the whole images is time consuming
    eval_imgs = t_imgs[::5]
    eval_labels = t_labels[::5]
    avg_acc = 0
    avg_loss = 0
    
    for seq in range(bucket):
        inp.set(eval_imgs[seq::bucket])
        lab.set(eval_labels[seq::bucket])
        edf.Forward()
        avg_acc += acc.value
        avg_loss += loss.value
    
    return avg_acc/bucket, avg_loss/bucket
        
def eval_test():
    
    avg_acc = 0
    avg_loss = 0
    for seq in range(bucket):
        inp.set(v_imgs[seq::bucket])
        lab.set(v_labels[seq::bucket])
        edf.Forward()
        avg_acc += acc.value
        avg_loss += loss.value
    
    return avg_acc/bucket, avg_loss/bucket

# initial accuracy 
random_acc, random_loss = eval_test()
print("Random test loss = %.4f, accuracy = %.4f" % (random_loss, random_acc))


################################################# train loop ######################################################
ep = 0
epoch = 10
batch = 100
train_loss = []; train_acc = []; test_loss =[]; test_acc = []
stime = time()
batches = range(0, len(t_labels), batch)

while ep < epoch:

    # randon shuffle the train data in each epoch
    perm = np.random.permutation(len(t_labels))

    for k in batches:
        inp.set(t_imgs[perm[k:k+batch]])
        lab.set(t_labels[perm[k:k+batch]])
        edf.Forward()
        edf.Backward(loss)
        edf.Adam()
        
    # evaluate on trainset
    t_acc, t_loss = eval_train()
    print("Epoch %d: train loss = %.4f [%.3f secs]" % (ep, t_loss,time()-stime))
    train_loss.append(t_loss)
    train_acc.append(t_acc)

    # evaluate on testset
    v_acc, v_loss = eval_test()
    print("test accuracy = %.4f" % v_acc)
    test_loss.append(v_loss)
    test_acc.append(v_acc)
    stime = time()
    ep += 1      


# plot
plt.figure(1)
plt.xlabel("epochs")
plt.ylabel("loss")
plt.plot(np.arange(len(test_loss)), test_loss, color='red')
plt.plot(np.arange(len(train_loss)), train_loss, color='blue')
plt.legend(['test loss', 'train loss'], loc='upper right')
plt.show()

plt.figure(2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.plot(np.arange(len(test_acc)), test_acc, color='red')
plt.plot(np.arange(len(train_acc)), train_acc, color='blue')
plt.legend(['test acc', 'train acc'], loc='lower right')
plt.show()