# Import Libraries

In [1]:
import numpy as np
import time

np.random.seed(97)


# 1. Layer

In [2]:
class Layer:
    
    def __init__(self, INPUT_DIM=(1,2,1), kernel=(1,2,1,1), stride=(1,1), OUTPUT_DIM=(1,1,1)):
        self.INPUT_DIM = INPUT_DIM                                                                  # INPUT_DIM  == (row_i, column_i, channel_i)
        self.OUTPUT_DIM = OUTPUT_DIM                                                                # OUTPUT_DIM == (row_o, column_o, channel_o)
        self.KERNEL_DIM = kernel                                                                    # kernel     == (row_k, column_k, channel_i, channel_o)
        self.stride = stride                                                                        # stride     == (row_s, column_s)    

#         self.w = np.zeros(self.KERNEL_DIM, dtype=np.float64)                                        # w_dim      == (row_k, column_k, channel_i) * channel_o
#         self.b = np.zeros(self.OUTPUT_DIM[-1], dtype=np.float64)                                    # b_dim      == 1 * channel_o
        self.w = np.random.uniform(-1, 1, self.KERNEL_DIM)                                          # w_dim      == (row_k, column_k, channel_i) * channel_o
        self.b = np.random.uniform(-1, 1, self.OUTPUT_DIM[-1])                                      # b_dim      == 1 * channel_o

    def forward(self, X):
        MIN_MARGIN = 2 ** -53
                    
        for row_idx in range(self.OUTPUT_DIM[0]):
            row_s = row_idx * self.stride[0]
            row_e = row_s + self.KERNEL_DIM[0]                                              
            for column_idx in range(self.OUTPUT_DIM[1]):
                column_s = column_idx * self.stride[1]
                column_e = column_s + self.KERNEL_DIM[1]                                
                for channel_o_idx in range(self.OUTPUT_DIM[2]): 

                    tmp_z = self.w[:,:,:,channel_o_idx] * X[:, row_s:row_e, column_s:column_e, :]   #       (row_k, column_k, channel_i, 1)
                                                                                                    # * (-1, row_k, column_k, channel_i)
                                                                                                    # = (-1, row_k, column_k, channel_i)
                    tmp_z = np.sum(tmp_z, axis=(1,2,3)) + self.b[channel_o_idx]                     # (-1, 1)
                    if len(tmp_z.shape)==1:
                        tmp_z=tmp_z.reshape(-1,1)
                    if row_idx == 0 and column_idx == 0 and channel_o_idx == 0:
                        self.z = tmp_z
                    else:
                        self.z = np.concatenate((self.z, tmp_z), axis=1)
                                                                                                        
                            
        self.z = self.z.reshape((-1,)+self.OUTPUT_DIM)                                              # (-1, row_o , column_o , channel_o)

        
        self.a = 1 / (1 + np.exp(-self.z))                                                          # (-1, row_o , column_o , channel_o)
        self.a = np.maximum(np.minimum(1 - MIN_MARGIN, self.a), MIN_MARGIN)                         # (-1, row_o , column_o , channel_o)
        return self.a                                                                               # (-1, row_o , column_o , channel_o)


    def backward(self, X, dx_next, learning_rate):
        # x_next == a_now
        # dx_next == da_now
        # dx_next == d_loss / dx_next == d_loss / da_now
        # == da
        da = dx_next                                                                                # (-1, row_o , column_o , channel_o)
        dz = self.a * (1 - self.a) * da                                                             # (-1, row_o , column_o , channel_o)
        dx = np.zeros(X.shape, dtype=np.float64)                                                    # (-1, row_i , column_i , channel_i)
        # check each "filters" in total kernel!
        for channel_o_idx in range(self.OUTPUT_DIM[2]):
            for row_idx in range(self.OUTPUT_DIM[0]):
                row_s = row_idx * self.stride[0]
                row_e = row_s + self.KERNEL_DIM[0]                                              
                for column_idx in range(self.OUTPUT_DIM[1]):
                    column_s = column_idx * self.stride[1]
                    column_e = column_s + self.KERNEL_DIM[1]                                
                    
                    # set dw
                    X_selected = X[:, row_s:row_e, column_s:column_e, :]                            # (-1, row_k, column_k, channel_i)
                    dz_selected = dz[:, row_idx, column_idx, channel_o_idx].reshape(-1,1,1,1)       # (-1,     1,        1,         1)
                    
                    if row_idx == 0 and column_idx == 0:
                        dw = X_selected * dz_selected                                               #   (-1, row_k, column_k, channel_i) 
                    else:                                                                           # * (-1,     1,        1,         1)
                        dw += X_selected * dz_selected                                              # = (-1, row_k, column_k, channel_i) 
                    
                    # set dx
                    w_selected = self.w[:,:,:,channel_o_idx]                                        #     (row_k, column_k, channel_i, 1)
                    dx[:, row_s:row_e, column_s:column_e, :] += dz_selected * w_selected            # (-1, row_k, column_k, channel_i, 1)
            
            dw = np.mean(dw, axis=0)                                                                # (1, row_k, column_k, channel_i)
            db = np.mean(np.sum(dz[:,:,:,channel_o_idx], axis=(1,2)), axis=0)                       # (1, 1)
            
            # update weights of current "filter" in total kernel
            # current "filter" == channel_th filter of kernel
            self.w[:,:,:,channel_o_idx] -= learning_rate * dw
            self.b[channel_o_idx] -= learning_rate * db
        
        dx = dx.reshape((-1,)+self.INPUT_DIM)                                                       # (-1, row_i, column_i, channel_i)
        return dx                                                                                   # (-1, row_i, column_i, channel_i)     

# 2. Interface of neural network

In [3]:
class Network:
    
    def __init__(self, layers):
        self.layers = layers
        self.inputs = ['tmp' for i in layers]
    
    def forward(self, X):
        input_mat = X
        for idx, layer in enumerate(self.layers):
            self.inputs[idx]=input_mat
            input_mat = layer.forward(input_mat)

        return input_mat
        
    def backward(self, dx_next, learning_rate):
        # dx_next == da_now (x->z->a)
        for layer, input_mat in zip(reversed(self.layers), reversed(self.inputs)):
            dx_next=layer.backward(input_mat, dx_next, learning_rate)
            
    def train(self, X, y, learning_rate):
        pred_y = self.forward(X)
        dy_of_dx = -y / pred_y + (1 - y) / (1 - pred_y)
        self.backward(dy_of_dx, learning_rate)
        
    def predict(self, X):
        return np.round(self.forward(X))
    
    def loss(self, X, y):
        pred_y = self.forward(X)
        return -np.mean(y * np.log(pred_y) + (1 - y) * np.log(1 - pred_y))
    
    def print_layers(self):
        for layer_idx, layer in enumerate(self.layers):
            print('Layer #{}.  input : {}\t kernel : {}\t output : {}'.format(layer_idx+1, layer.INPUT_DIM, layer.KERNEL_DIM, layer.OUTPUT_DIM))


# 3. Train Model

In [4]:
def create_samples_sum(sample_num):
    X = np.random.uniform(-10, 10, (sample_num, 2))
    y = (np.sum(X, 1) >0).astype(float)
    return X, y


In [5]:
def create_samples_mul(sample_num):
    X = np.random.uniform(-2, 2, (sample_num, 2))
    y = (X[:,0]*X[:,0] > X[:,1]).astype(float)
    return X, y


In [6]:
def run_practice_2(train_samples, test_samples, learning_rate, epochs, print_option='default'):
    
    train_X, train_y = create_samples_sum(train_samples)
    test_X, test_y = create_samples_sum(test_samples)
    if print_option != 'nothing': print('shape of samples(before): ', train_X.shape, train_y.shape)

    train_X, train_y = train_X.reshape(-1,1,1,2), train_y.reshape(-1,1,1,1)
    test_X, test_y = test_X.reshape(-1,1,1,2), test_y.reshape(-1,1,1,1)
    if print_option != 'nothing': print('shape of samples(after): ', train_X.shape, train_y.shape)
 
    # make model
    model = Network(
        layers = [
            Layer(INPUT_DIM=(1,1,2), kernel=(1,1,2,1), stride=(1,1), OUTPUT_DIM=(1,1,1)),
        ] 
    )
    if print_option != 'nothing': 
        print('train_samples : ', train_samples)
        print('test_samples  : ', test_samples)
        print('epochs        : ', epochs)
        print('learning_rate : ', learning_rate)
        model.print_layers();

    for epoch in range(epochs):
        model.train(train_X, train_y, learning_rate)

        if print_option == 'progress':
            print('\nepoch #' + str(epoch+1))
            print('w :' + str(model.layers[0].w.reshape((1,2))))
            print('b : ' + str(model.layers[0].b))
            
    return {'w': model.layers[0].w.reshape((1,2)), 
            'b': model.layers[0].b,
            'train_loss': model.loss(train_X, train_y),
            'test_loss': model.loss(test_X, test_y),
            'train_acc': 100 * np.mean(model.predict(train_X).astype(bool) == train_y.astype(bool)),
            'test_acc': 100 * np.mean(model.predict(test_X).astype(bool) == test_y.astype(bool))}


In [7]:
def run_practice_3(train_samples, test_samples, learning_rate, epochs, print_option='default'):
    
    train_X, train_y = create_samples_mul(train_samples)
    test_X, test_y = create_samples_mul(test_samples)
    if print_option != 'nothing': print('shape of samples(before): ', train_X.shape, train_y.shape)

    train_X, train_y = train_X.reshape(-1,1,1,2), train_y.reshape(-1,1,1,1)
    test_X, test_y = test_X.reshape(-1,1,1,2), test_y.reshape(-1,1,1,1)
    if print_option != 'nothing': print('shape of samples(after): ', train_X.shape, train_y.shape)
 
    # make model
    model = Network(
        layers = [
            Layer(INPUT_DIM=(1,1,2), kernel=(1,1,2,10), stride=(1,1), OUTPUT_DIM=(1,1,10)),
            Layer(INPUT_DIM=(1,1,10), kernel=(1,1,10,5), stride=(1,1), OUTPUT_DIM=(1,1,5)),
            Layer(INPUT_DIM=(1,1,5), kernel=(1,1,5,1), stride=(1,1), OUTPUT_DIM=(1,1,1)),
        ] 
    )
    if print_option != 'nothing': 
        print('train_samples : ', train_samples)
        print('test_samples  : ', test_samples)
        print('epochs        : ', epochs)
        print('learning_rate : ', learning_rate)
        model.print_layers();

    for epoch in range(epochs):
        model.train(train_X, train_y, learning_rate)

        if print_option == 'progress':
            print('\nepoch #' + str(epoch+1))
            print('w :' + str(model.layers[0].w.reshape((1,2))))
            print('b : ' + str(model.layers[0].b))
            
    return {
            'train_loss': model.loss(train_X, train_y),
            'test_loss': model.loss(test_X, test_y),
            'train_acc': 100 * np.mean(model.predict(train_X).astype(bool) == train_y.astype(bool)),
            'test_acc': 100 * np.mean(model.predict(test_X).astype(bool) == test_y.astype(bool))
    }


In [8]:
train_samples = 100 # m
test_samples = 100 # n
learning_rate = 1e-2
epochs = 100 # K


In [9]:
run_practice_2(train_samples, test_samples, learning_rate, epochs)
#run_practice_3(train_samples, test_samples, learning_rate, epochs)



shape of samples(before):  (100, 2) (100,)
shape of samples(after):  (100, 1, 1, 2) (100, 1, 1, 1)
train_samples :  100
test_samples  :  100
epochs        :  100
learning_rate :  0.01
Layer #1.  input : (1, 1, 2)	 kernel : (1, 1, 2, 1)	 output : (1, 1, 1)


{'w': array([[0.3623896 , 0.38447664]]),
 'b': array([-0.37123757]),
 'train_loss': 0.1923212444689857,
 'test_loss': 0.2301040789467194,
 'train_acc': 95.0,
 'test_acc': 91.0}

# 4. Run Tasks

In [10]:
train_samples = 1000 # m
test_samples = 100 # n
learning_rate = 0.5
epochs = 1000 # K


##  a) Task 1

In [11]:
def run_task_1(train_samples, test_samples, learning_rate, epochs, print_option='default'):
    
    train_X, train_y = create_samples_mul(train_samples)
    test_X, test_y = create_samples_mul(test_samples)
    if print_option != 'nothing': print('shape of samples(before): ', train_X.shape, train_y.shape)

    train_X, train_y = train_X.reshape(-1,1,1,2), train_y.reshape(-1,1,1,1)
    test_X, test_y = test_X.reshape(-1,1,1,2), test_y.reshape(-1,1,1,1)
    if print_option != 'nothing': print('shape of samples(after): ', train_X.shape, train_y.shape)
 
    # make model
    model = Network(
        layers = [
            Layer(INPUT_DIM=(1,1,2), kernel=(1,1,2,1), stride=(1,1), OUTPUT_DIM=(1,1,1)),
        ] 
    )
    if print_option != 'nothing': 
        print('train_samples : ', train_samples)
        print('test_samples  : ', test_samples)
        print('epochs        : ', epochs)
        print('learning_rate : ', learning_rate)
        model.print_layers();

    for epoch in range(epochs):
        model.train(train_X, train_y, learning_rate)
        
        if print_option == 'progress':
            print('\nepoch #' + str(epoch+1))
            print('w :' + str(model.layers[0].w.reshape((1,2))))
            print('b : ' + str(model.layers[0].b))
            
    return {'w': model.layers[0].w.reshape((1,2)), 
            'b': model.layers[0].b,
            'train_loss': model.loss(train_X, train_y),
            'test_loss': model.loss(test_X, test_y),
            'train_acc': 100 * np.mean(model.predict(train_X).astype(bool) == train_y.astype(bool)),
            'test_acc': 100 * np.mean(model.predict(test_X).astype(bool) == test_y.astype(bool))}


In [12]:
run_task_1(train_samples, test_samples, learning_rate, epochs)


shape of samples(before):  (1000, 2) (1000,)
shape of samples(after):  (1000, 1, 1, 2) (1000, 1, 1, 1)
train_samples :  1000
test_samples  :  100
epochs        :  1000
learning_rate :  0.5
Layer #1.  input : (1, 1, 2)	 kernel : (1, 1, 2, 1)	 output : (1, 1, 1)


{'w': array([[ 0.12136027, -2.13662267]]),
 'b': array([2.42794785]),
 'train_loss': 0.32088295598973277,
 'test_loss': 0.40643315780460765,
 'train_acc': 83.2,
 'test_acc': 80.0}

## b) Task 2

In [13]:
def run_task_2(train_samples, test_samples, learning_rate, epochs, print_option='default'):
    
    train_X, train_y = create_samples_mul(train_samples)
    test_X, test_y = create_samples_mul(test_samples)
    if print_option != 'nothing': print('shape of samples(before): ', train_X.shape, train_y.shape)

    train_X, train_y = train_X.reshape(-1,1,1,2), train_y.reshape(-1,1,1,1)
    test_X, test_y = test_X.reshape(-1,1,1,2), test_y.reshape(-1,1,1,1)
    if print_option != 'nothing': print('shape of samples(after): ', train_X.shape, train_y.shape)
 
    # make model
    model = Network(
        layers = [
            Layer(INPUT_DIM=(1,1,2), kernel=(1,1,2,1), stride=(1,1), OUTPUT_DIM=(1,1,1)),
            Layer(INPUT_DIM=(1,1,1), kernel=(1,1,1,1), stride=(1,1), OUTPUT_DIM=(1,1,1)),
        ] 
    )
    if print_option != 'nothing': 
        print('train_samples : ', train_samples)
        print('test_samples  : ', test_samples)
        print('epochs        : ', epochs)
        print('learning_rate : ', learning_rate)
        model.print_layers();

    for epoch in range(epochs):
        model.train(train_X, train_y, learning_rate)
        
        if print_option == 'progress':
            print('\nepoch #' + str(epoch+1))
            print('w :' + str(model.layers[0].w.reshape((1,2))))
            print('b : ' + str(model.layers[0].b))
            
    return {'w0': model.layers[0].w.reshape((1,2)), 
            'b0': model.layers[0].b,
            'w1': model.layers[1].w.reshape((1,1)), 
            'b1': model.layers[1].b,
            'train_loss': model.loss(train_X, train_y),
            'test_loss': model.loss(test_X, test_y),
            'train_acc': 100 * np.mean(model.predict(train_X).astype(bool) == train_y.astype(bool)),
            'test_acc': 100 * np.mean(model.predict(test_X).astype(bool) == test_y.astype(bool))}


In [14]:
run_task_2(train_samples, test_samples, learning_rate, epochs)


shape of samples(before):  (1000, 2) (1000,)
shape of samples(after):  (1000, 1, 1, 2) (1000, 1, 1, 1)
train_samples :  1000
test_samples  :  100
epochs        :  1000
learning_rate :  0.5
Layer #1.  input : (1, 1, 2)	 kernel : (1, 1, 2, 1)	 output : (1, 1, 1)
Layer #2.  input : (1, 1, 1)	 kernel : (1, 1, 1, 1)	 output : (1, 1, 1)


{'w0': array([[1.0546917 , 3.82076333]]),
 'b0': array([-0.42414055]),
 'w1': array([[-5.26771531]]),
 'b1': array([4.79376737]),
 'train_loss': 0.3197236111774917,
 'test_loss': 0.31104145063535904,
 'train_acc': 81.39999999999999,
 'test_acc': 82.0}

## c) Task 3

In [15]:
def run_task_3(train_samples, test_samples, learning_rate, epochs, print_option='default'):
    
    train_X, train_y = create_samples_mul(train_samples)
    test_X, test_y = create_samples_mul(test_samples)
    if print_option != 'nothing': print('shape of samples(before): ', train_X.shape, train_y.shape)

    train_X, train_y = train_X.reshape(-1,1,1,2), train_y.reshape(-1,1,1,1)
    test_X, test_y = test_X.reshape(-1,1,1,2), test_y.reshape(-1,1,1,1)
    if print_option != 'nothing': print('shape of samples(after): ', train_X.shape, train_y.shape)
 
    # make model
    model = Network(
        layers = [
            Layer(INPUT_DIM=(1,1,2), kernel=(1,1,2,3), stride=(1,1), OUTPUT_DIM=(1,1,3)),
            Layer(INPUT_DIM=(1,1,3), kernel=(1,1,3,1), stride=(1,1), OUTPUT_DIM=(1,1,1)),
        ] 
    )
    if print_option != 'nothing': 
        print('train_samples : ', train_samples)
        print('test_samples  : ', test_samples)
        print('epochs        : ', epochs)
        print('learning_rate : ', learning_rate)
        model.print_layers();

    for epoch in range(epochs):
        model.train(train_X, train_y, learning_rate)
        
        if print_option == 'progress':
            print('\nepoch #' + str(epoch+1))
            print('w :' + str(model.layers[0].w.reshape((1,2))))
            print('b : ' + str(model.layers[0].b))
            
    return {'w0': model.layers[0].w.reshape((3,2)), 
            'b0': model.layers[0].b.reshape((3,1)),
            'w1': model.layers[1].w.reshape((1,3)), 
            'b1': model.layers[1].b,
            'train_loss': model.loss(train_X, train_y),
            'test_loss': model.loss(test_X, test_y),
            'train_acc': 100 * np.mean(model.predict(train_X).astype(bool) == train_y.astype(bool)),
            'test_acc': 100 * np.mean(model.predict(test_X).astype(bool) == test_y.astype(bool))}


In [16]:
run_task_3(train_samples, test_samples, learning_rate, epochs)

shape of samples(before):  (1000, 2) (1000,)
shape of samples(after):  (1000, 1, 1, 2) (1000, 1, 1, 1)
train_samples :  1000
test_samples  :  100
epochs        :  1000
learning_rate :  0.5
Layer #1.  input : (1, 1, 2)	 kernel : (1, 1, 2, 3)	 output : (1, 1, 3)
Layer #2.  input : (1, 1, 3)	 kernel : (1, 1, 3, 1)	 output : (1, 1, 1)


{'w0': array([[-4.59781693, -1.82834322],
        [-4.78475032,  2.84941992],
        [ 2.14553823, -2.86890021]]),
 'b0': array([[ 1.15998563],
        [-0.12311688],
        [-1.59139267]]),
 'w1': array([[-5.93242727, -2.29232352,  7.5587517 ]]),
 'b1': array([4.2942567]),
 'train_loss': 0.0715170417173077,
 'test_loss': 0.09507225857137415,
 'train_acc': 98.9,
 'test_acc': 98.0}

## d) Find Best Parameter in Task 3

#### - Learning Rate

In [None]:
lr=10.0
trials=10
results = []

while(lr>1e-3):
    train_acc = 0.0
    test_acc = 0.0
    for trial in range(trials):
        result = run_task_3(train_samples, test_samples, learning_rate, epochs, print_option='nothing')
        train_acc+=result['train_acc']
        test_acc+=result['test_acc']
    
    results.append({
        'lr': lr,
        'train_acc':train_acc/trials,
        'test_acc':test_acc/trials
    })
    lr/=1.5
    
print(np.array(results))


In [None]:
best_learning_rate = sorted(results, key = lambda x : x['train_acc']+x['test_acc'])[-1]['lr']
print('best_learning_rate : ', best_learning_rate)

#### - Get the Result of the Best Case

In [None]:
result = run_task_3(train_samples, test_samples, best_learning_rate, epochs, print_option='nothing')
while(result['test_acc']<99.5 or result['train_acc']<99.5):
    result = run_task_3(train_samples, test_samples, learning_rate, epochs, print_option='nothing')


In [None]:
result
