In [1]:
import numpy as np 
import matplotlib.pyplot as plt

# install
## numpy
## matplotlib

## data load & preprocessing

In [2]:
from dataset.mnist import load_mnist

(train_raw_img, train_label), (test_raw_img, test_label) = load_mnist(flatten=False, normalize=False)
print(train_raw_img.shape)

(60000, 1, 28, 28)


In [3]:
# preprocessing (train & inference)

train_img = train_raw_img.reshape(len(train_raw_img.squeeze()), -1)
train_label = train_label.reshape(len(train_label), -1)

test_img = test_raw_img.reshape(len(test_raw_img.squeeze()), -1)
test_label = test_label.reshape(len(test_label), -1)

print(train_img.shape)
print(train_label.shape)
print(test_img.shape)
print(test_label.shape)

(60000, 784)
(60000, 1)
(10000, 784)
(10000, 1)


In [4]:
# normalization (set value 0 ~ 1)

train_img = train_img.astype('float')
train_img = train_img/255

test_img = test_img.astype('float')
test_img = test_img/255

## model

In [5]:
class Linear:
    def __init__(self, input_size=1, hidden_size=1):
        # Initialize weights and biases
        self.W = np.random.randn(input_size, hidden_size)
        self.b = np.zeros(hidden_size)
        
        # Store input size and hidden size
        self.input_size = input_size
        self.hidden_size = hidden_size
        
    def forward(self, x):
        # Calculate linear transformation
        self.x = x
        out = np.dot(x, self.W) + self.b
        
        return out
    
    def backward(self, dout, lr, lamb=0):
        # Calculate gradients for weights and biases
        dW = np.dot(self.x.T, dout)
        db = np.sum(dout, axis=0)
        
        # Update weights and biases
        self.W -= lr * (dW + lamb*self.W)
        self.b -= lr * db
        
        # Calculate gradients for input
        dx = np.dot(dout, self.W.T)
        
        return dx


# [실습 1] activation function 구현

In [6]:
class ReLU:
    def __init__(self):
        self.mask = None
        
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
        
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        return dx

class LeakyReLU:
    def __init__(self, alpha=0.01):
        self.alpha = alpha
        self.mask = None
        
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] *= self.alpha
        return out
        
    def backward(self, dout):
        dout[self.mask] *= self.alpha
        dx = dout
        return dx

class Sigmoid:
    def __init__(self):
        self.out = None
        
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        return out
        
    def backward(self, dout):
        dx = dout * self.out * (1 - self.out)
        return dx

class Tanh:
    def __init__(self):
        self.out = None
        
    def forward(self, x):
        out = np.tanh(x)
        self.out = out
        return out
        
    def backward(self, dout):
        dx = dout * (1 - self.out ** 2)
        return dx


In [7]:
class Softmax_with_CrossEntropy :
    def __init__(self) :
        # softmax
        self.softmax_x = None 
        self.softmax_out = None
        # crossEntropy
        self.pred = None
        self.target = None
        self.delta = 1e-7
        
    def softmax_forward(self, x) :
        self.softmax_x = x
        tmp = np.max(self.softmax_x, axis=1).reshape(-1, 1)
        self.softmax_out = np.exp(self.softmax_x-tmp)/np.sum(np.exp(self.softmax_x-tmp), axis=1).reshape(-1,1)
        return self.softmax_out
    
    def crossEntropy_forward(self, pred, target) :
        self.pred = pred
        self.target = target
        loss = -np.sum(self.target*np.log(self.pred+self.delta), axis=1)
        return loss
    
    def backward(self) :
        dout = (self.pred-self.target)/len(self.pred)
        return dout

## Train / Eval

In [8]:
# one_hot label 만드는 함수

def make_one_hot(labels) :
    a = []
    for label in labels :
        one_hot = np.zeros(10)
        one_hot[label] = 1
        a.append(one_hot)
    a = np.array(a)
    return a

# one_hot_labels = make_one_hot(train_label)
# print(train_label[0])
# print(one_hot_labels[0])

In [9]:
def eval(model, train_version = True) :
    if train_version :
        x = train_img
        labels = train_label.squeeze()
        print('In train dataset ... ')
    else : 
        x = test_img
        labels = test_label.squeeze()
        print('\nIn test dataset ... ')
    
    for layer in model.values() :
        if isinstance(layer, Softmax_with_CrossEntropy) :
            x = layer.softmax_forward(x)
        else :
            x = layer.forward(x)
            
    preds = x.argmax(axis=1)
    acc = np.sum(np.where(preds==labels, True, False))/len(labels)
    return acc

In [10]:
from collections import OrderedDict

def train_MLP(config) :
    lr, num_epoch = config['learning_rate'], config['num_epoch']
    print_loss_interval = 20
    
    layer1 = Linear(784, 100)
    activation_function = config['activation_function']
    layer2 = Linear(100, 10)
    softmax_with_CE = Softmax_with_CrossEntropy()
    
    for iter in range(num_epoch) :
        # forward
        x = layer1.forward(train_img)
        x = activation_function.forward(x)
        x = layer2.forward(x)
        preds = softmax_with_CE.softmax_forward(x)
        
        # loss
        one_hot_labels = make_one_hot(train_label)
        losses = softmax_with_CE.crossEntropy_forward(preds, one_hot_labels)
        loss = losses.sum()/len(preds)
        
        # backward
        dL = softmax_with_CE.backward()
        dL = layer2.backward(dL, lr)
        dL = activation_function.backward(dL)
        dL = layer1.backward(dL, lr)
        
        if iter == 0 or (iter+1) % print_loss_interval == 0:
            print("[epoch %d / %d] average loss : %f" % (iter+1, num_epoch, loss))
            
    model = OrderedDict()
    model['layer1'] = layer1
    model['activation_function'] = activation_function
    model['layer2'] = layer2
    model['softmax_with_CE'] = softmax_with_CE
    
    return model

In [12]:
config1 = { 'learning_rate' : 0.1,
            'num_epoch' : 100,
            'activation_function' : ReLU()
          }

config2 = { 'learning_rate' : 0.1,
            'num_epoch' : 100,
            'activation_function' : LeakyReLU()
          }

config3 = { 'learning_rate' : 0.1,
            'num_epoch' : 100,
            'activation_function' : Sigmoid()
          }

config4 = { 'learning_rate' : 0.1,
            'num_epoch' : 100,
            'activation_function' : Tanh()
          }

print('---ReLU---')
model1 = train_MLP(config1)

print('\n---Leaky_Relu---')
model2 = train_MLP(config2)

print('\n---Sigmoid---')
model3 = train_MLP(config3)

print('\n---TanH---')
model4 = train_MLP(config4)

---ReLU---
[epoch 1 / 100] average loss : 14.950970
[epoch 20 / 100] average loss : 5.157625
[epoch 40 / 100] average loss : 3.568637
[epoch 60 / 100] average loss : 2.956997
[epoch 80 / 100] average loss : 2.607218
[epoch 100 / 100] average loss : 2.367798

---Leaky_Relu---
[epoch 1 / 100] average loss : 14.447551
[epoch 20 / 100] average loss : 5.268470
[epoch 40 / 100] average loss : 3.749723
[epoch 60 / 100] average loss : 3.122307
[epoch 80 / 100] average loss : 2.750167
[epoch 100 / 100] average loss : 2.492386

---Sigmoid---
[epoch 1 / 100] average loss : 10.126408
[epoch 20 / 100] average loss : 5.111660
[epoch 40 / 100] average loss : 4.137771
[epoch 60 / 100] average loss : 3.443539
[epoch 80 / 100] average loss : 2.942252
[epoch 100 / 100] average loss : 2.572567

---TanH---
[epoch 1 / 100] average loss : 11.754419
[epoch 20 / 100] average loss : 8.225606
[epoch 40 / 100] average loss : 5.967779
[epoch 60 / 100] average loss : 4.640082
[epoch 80 / 100] average loss : 3.82818

In [None]:
# evaluation

print('\t Accuracy :', eval(model1, train_version=False))
print('\t Accuracy :', eval(model2, train_version=False))
print('\t Accuracy :', eval(model3, train_version=False))
print('\t Accuracy :', eval(model4, train_version=False))

# [실습 2] type of gradient descent

In [None]:
from collections import OrderedDict

def train_MLP_v2(config) :
    lr, num_epoch = config['learning_rate'], config['num_epoch']
    print_loss_interval = 20

    layer1 = Linear(784,100)
    activation_function = config['activation_function']
    layer2 = Linear(100,10)
    Softmax_with_CrossEntropy = Softmax_with_CrossEntropy()
    batch_size = config['batch_size']
    

In [None]:
print('---batch---')
config_batch = { 'learning_rate' : 0.1,
            'num_epoch' : 20,
            'activation_function' : Relu(),
            'batch_size' : len(train_img)
          }
model_batch = train_MLP_v2(config_batch)


print('\n---mini_batch---')
config_mini_batch = { 'learning_rate' : 0.1,
            'num_epoch' : 20,
            'activation_function' : Relu(),
            'batch_size' : # fill the mini_batch size
          }
model_mini_batch = train_MLP_v2(config_mini_batch)

print('\n---stochastic---')
config_stochastic = { 'learning_rate' : 0.1,
            'num_epoch' : 20,
            'activation_function' : Relu(),
            'batch_size' : 1
          }
model_stochastic = train_MLP_v2(config_stochastic)

In [None]:
print('\t Accuracy :', eval(model_batch, train_version=False))
print('\t Accuracy :', eval(model_mini_batch, train_version=False))
print('\t Accuracy :', eval(model_stochastic, train_version=False))

# Extra problem 1 (Multi Support Vector Machine)

In [None]:
class MSVM :
    def __init__(self) :
        pass
        
    def forward(self, pred, target) :
        pass
    
    def backward(self) :
        pass

In [None]:
from collections import OrderedDict

def train_MSVM(config) :
    pass

In [None]:
config = { 'learning_rate' : 0.1,
            'num_epoch' : 100,
            'activation_function' : ReLU()
          }

model = train_MSVM(config)

In [None]:
print('\t Accuracy :', eval(model, train_version=False))

# Extra problem 2 (3-layer 이상 MLP 구현)