In [1]:
import numpy as np
import cv2

In [2]:
def read_img(file):
    img_name_list,label_list = [],[]
    with open(file) as f:
        for i in f:
            img_name_list.append(i.strip()[:-2])
            label_list.append(int(i.strip()[-1]))
    
    return img_name_list,label_list

In [None]:
train_img_name_list,train_label = read_img("train.txt")

# 目標

## model

- 1-layer perceptron

- 2-layer perceptron

- modified lenet5

## activation function

## loss function

In [56]:
class sigmoid():
    def __init__(self):
        pass
    def forward(self,x):
        self.x = x
        self.x1 = -self.x
        self.x2 = np.exp(self.x1)
        self.x3 = 1 + self.x2
        output = 1 / self.x3
        return output
    def backward(self,upg): # upg:upstream gradient
        self.gd3 = upg * (-1 / (self.x3)**2)
        self.gd2 = self.gd3 * 1
        self.gd1 = self.gd2 * np.exp(self.x1)
        self.gd0 = self.gd1 * -1
        return self.gd0
            
class relu():
    def __init__(self):
        pass
    def forward(self,x):
        self.x = x
        output = np.maximum(0,x)
        return output
    def backward(self,upg):
        self.gd = upg
        self.gd[self.x <= 0] = 0
        return self.gd
    
class softmax():
    def __init__(self):
        pass
    def forward(self,pred):
        self.x = pred
        self.x1 = np.exp(self.x)
        output = self.x1/(self.x1.sum(axis=1)[:,None])
        return output
    def backward(self,upg):
        n = self.x1.shape
        row_sum = self.x1.sum(axis=1)[:,None]
        self.gd1 = np.zeros((n,n))
        for i in range(n):
            for j in range(n):
                self.gd1[i,j] = self.x1[i,j] * (row_sum[i] - np.exp(self.x1[i,j])) / row_sum[i]
        self.gd1 = upg * self.gd1
        self.gd0 = self.gd1 * np.exp(self.x)
        return self.gd1

In [3]:
class crossentropy(): # softmax->neg log likelihood
    def __init__(self):
        self.softmax = softmax()
    def forward(self,pred,gt):
        pred = self.softmax.forward(pred)
        loss = self.negloglike(pred,gt)
        print(loss)

    def negloglike(self,pred,gt):
        loss_matrix = np.sum(gt * np.log(pred),axis=1)
        loss_sum = round(-sum(loss_matrix),5)
        return loss_sum/len(pred)

In [4]:
class linear():
    def __init__(self, in_dim, out_dim):
        self.weight = np.random.rand(in_dim,out_dim)
        self.bias = np.random.rand(out_dim)
    def forward(self, x): # suppose m*n
        output = np.dot(x,self.weight) + self.bias
        return output


In [20]:
class percep():
    def __init__(self):
        self.lin1 = linear(3,2)
        self.sigmoid1 = sigmoid()
        self.lin2 = linear(2,2)
        self.sigmoid2 = sigmoid()
    def forward(self,x):
        x = self.lin1.forward(x)
        x = self.sigmoid1.forward(x)
        x = self.lin2.forward(x)
        output = self.sigmoid2.forward(x)
        return output
    
    

In [6]:
class maxpool():
    def __init__(self,kernel_size):
        self.size = kernel_size
    def forward(self,x): # x: number of image, number of channel, image height, image width
        num_img, num_cha, height, width = x.shape
        pool_height = height - self.size + 1
        pool_width = width - self.size + 1
        output = np.zeros((num_img,num_cha,pool_height,pool_width))
        for i in range(num_img):
            for j in range(num_cha):
                for k in range(pool_height):
                    for l in range(pool_width):
                        output[i,j,k,l] = x[i,j,k:k+self.size,l:l+self.size].max()



In [7]:
class conv():
    def __init__(self,kernel_size):
        self.size = kernel_size
        self.weight = np.random.randn(kernel_size,kernel_size)
    def forward(self,x): # x: number of image, number of channel, image height, image width
        num_img, num_cha, height, width = x.shape
        pool_height = height - self.size + 1
        pool_width = width - self.size + 1
        output = np.zeros((pool_height,pool_width))
        for i in range(num_img):
            for j in range(num_cha):
                for k in range(pool_height):
                    for l in range(pool_width):
                        output[k,l] = np.sum(x[i,j,k:k+self.size,l:l+self.size] * self.weight)

In [8]:
pred = [0.4,0.3,0.05,0.05,0.2]
gt = [1,0,0,0,0]

In [9]:
pred = np.random.randn(5,5)
pred

array([[ 0.70703991,  0.63626583,  1.14009381,  1.55090988, -1.50142339],
       [ 0.09911874, -0.63833547,  1.82261671, -0.08275603, -0.94557025],
       [-0.66549651, -0.52234733, -3.22215571,  0.29914553, -1.98252454],
       [-0.14034839, -1.52024515, -1.69835735,  1.56765242,  0.16046378],
       [ 0.3461986 , -0.50141101, -0.10741842, -0.6429422 ,  1.60648052]])

In [10]:
pred = np.exp(pred)/np.exp(pred).sum(axis=1)[:,None]
pred

array([[0.16923768, 0.15767407, 0.26095762, 0.3935366 , 0.01859403],
       [0.1209492 , 0.05785364, 0.67781105, 0.10083604, 0.04255007],
       [0.19518883, 0.22522877, 0.01513952, 0.51214587, 0.05229702],
       [0.12003312, 0.03020088, 0.02527356, 0.66233304, 0.16215941],
       [0.16772683, 0.07186052, 0.10656122, 0.06237695, 0.59147449]])

In [11]:
gt = np.diag([1]*5)
gt

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1]])

In [12]:
np.random.seed(1)
np.random.randn(3,3).max()

1.74481176421648

In [13]:
pred

array([[0.16923768, 0.15767407, 0.26095762, 0.3935366 , 0.01859403],
       [0.1209492 , 0.05785364, 0.67781105, 0.10083604, 0.04255007],
       [0.19518883, 0.22522877, 0.01513952, 0.51214587, 0.05229702],
       [0.12003312, 0.03020088, 0.02527356, 0.66233304, 0.16215941],
       [0.16772683, 0.07186052, 0.10656122, 0.06237695, 0.59147449]])

In [14]:
pred[0:2,0:2]

array([[0.16923768, 0.15767407],
       [0.1209492 , 0.05785364]])

In [17]:
x_test = np.arange(16)
x_test = x_test.reshape(4,4)
x_test

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])