In [16]:
import numpy as np
import csv
import time

np.random.seed(1234)
def randomize(): np.random.seed(time.time())

In [17]:
# Hyperparameters

RND_MEAN = 0
RND_STD = 0.0030

LEARNING_RATE = 0.001

In [18]:
def abalone_exec(epoch_count=10, mb_size=10, report=1):
    load_abalone_dataset()
    init_model()
    train_and_test(epoch_count, mb_size, report)

In [19]:
def load_abalone_dataset():
    with open('../../DataSets/abalone.data.csv') as csvfile:
        csvreader = csv.reader(csvfile)
#         next(csvreader, None)  # ignore header
        rows = []
        for row in csvreader:
            rows.append(row)
            
            
    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 10, 1
    data = np.zeros([len(rows), input_cnt+output_cnt])
    
    for n , row in enumerate(rows):
        if row[0] == 'I': data[n,0] = 1
        if row[0] == 'M': data[n,1] = 1
        if row[0] == 'F': data[n,2] = 1
        data[n, 3:] = row[1:]

In [20]:
def init_model_hidden1():
    global pm_output, pm_hidden, input_cnt, output_cnt, hidden_cnt
    
    pm_hidden = alloc_param_pair([input_cnt, hidden_cnt]) # 히든 레이어 파라미터
    pm_output = alloc_param_pair([hidden_cnt, output_cnt]) # 출력 레이어 파라미터
    
def alloc_param_pair(shape):
    weight = np.random.normal(RND_MEAN, RND_STD, shape) # normal distribution으로 초기화된 weights
    bias = np.zeros(shape[-1]) # 0으로 초기화된 bias
    return {'w':weight, 'b':bias}

In [21]:
def train_and_test(epoch_count, mb_size, report):
    step_count = arrange_data(mb_size)
    test_x, test_y = get_test_data()
    
    for epoch in range(epoch_count):
        losses, accs = [], []
        
        for n in range(step_count):
            train_x, train_y = get_train_data(mb_size, n)
            loss, acc = run_train(train_x, train_y)
            losses.append(loss)
            accs.append(acc)
            
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x, test_y)
            print('Epoch {}: loss={:5.3f}, accuracy={:5.3f}/{:5.3f}]'.format(epoch+1, np.mean(losses), np.mean(accs), acc))
            
    final_acc = run_test(test_x, test_y)
    print('\nFinal Test: final accuracy = {:5.3f}'.format(final_acc))

In [22]:
def arrange_data(mb_size):
    global data, shuffle_map, test_begin_idx
    shuffle_map = np.arange(data.shape[0])
    np.random.shuffle(shuffle_map)
    step_count = int(data.shape[0] * 0.8) // mb_size
    test_begin_idx = step_count * mb_size
    return step_count

def get_test_data():
    global data, shuffle_map, test_begin_dix, output_cnt
    test_data = data[shuffle_map[test_begin_idx:]]
    return test_data[:, :-output_cnt], test_data[:, -output_cnt:]

def get_train_data(mb_size, nth):
    global data, shuffle_map, test_begin_idx, output_cnt
    if nth == 0:
        np.random.shuffle(shuffle_map[:test_begin_idx])
    train_data = data[shuffle_map[mb_size*nth:mb_size*(nth+1)]]
    return train_data[:, :-output_cnt], train_data[:, -output_cnt:]

In [23]:
def run_train(x,y):
    output, aux_nn = forward_neuralnet(x)
    loss, aux_pp = forward_postproc(output, y)
    accuracy = eval_accuracy(output, y)
    
    G_loss = 1.0
    G_output = backprop_postproc(G_loss, aux_pp)
    backprop_neuralnet(G_output, aux_nn)
    
    return loss, accuracy

def run_test(x,y):
    output, _ = forward_neuralnet(x)
    accuracy = eval_accuracy(output, y)
    return accuracy

In [24]:
def forward_neuralnet_hidden1(x):
    global pm_output, pm_hidden
    
    hidden = relu(np.matmul(x, pm_hidden['w']) + pm_hidden['b'])
    output = np.matmul(hidden, pm_output['w']) + pm_output['b']
    
    return output, [x, hidden]

def relu(x):
    return np.maximum(x, 0)

In [60]:
def backprop_neuralnet_hidden1(G_output, aux):
    global pm_output, pm_hidden
    
    x, hidden = aux
    
    g_output_w_out = hidden.transpose()
    G_w_out = np.matmul(g_output_w_out, G_output)
    
    G_b_out = np.sum(G_output, axis=0)
    
    g_output_hidden = pm_output['w'].transpose()
    G_hidden = np.matmul(G_output, g_output_hidden)
    
    pm_output['w'] -= LEARNING_RATE * G_w_out
    pm_output['b'] -= LEARNING_RATE * G_b_out
    
    G_hidden = G_hidden * relu_derv(hidden)
    
    g_hidden_w_hid = x.transpose()
    G_w_hid = np.matmul(g_hidden_w_hid, G_hidden)
    G_b_hid = np.sum(G_hidden, axis=0)
    
    pm_hidden['w'] -= LEARNING_RATE * G_w_hid
    pm_hidden['b'] -= LEARNING_RATE * G_b_hid
    
def relu_derv(y):
    return np.sign(y)

In [26]:
def forward_postproc(output, y):
    diff = output - y
    square = np.square(diff)
    loss = np.mean(square)
    return loss, diff

def backprop_postproc(G_loss, diff):
    shape = diff.shape
    
    g_loss_square = np.ones(shape) / np.prod(shape) # square에 대한 L의 기울기
    g_square_diff = 2 * diff # diff에 대한 square의 기울기
    g_diff_output = 1 # output에 대한 diff의 기울기
    
    G_square = g_loss_square * G_loss
    G_diff = g_square_diff * G_square
    G_output = g_diff_output * G_diff
    
    return G_output

In [27]:
def init_model_hidden():
    global pm_output, pm_hiddens, input_cnt, output_cnt, hidden_config
    
    pm_hiddens = []
    prev_cnt = input_cnt
    
    for hidden_cnt in hidden_config:
        pm_hidden.append(alloc_param_pair([prev_cnt, hidden_cnt]))
        prev_cnt = hidden_cnt
        
    pm_output = alloc_param_pair([prev_cnt, output_cnt])

In [28]:
def forward_neuralnet_hiddens(x):
    global pm_output, pm_hiddens
    
    hidden = x
    hiddens = [x]
    
    for pm_hidden in pm_hiddens:
        hidden = relu(np.matmul(hidden, pm_hidden['w']) + pm_hidden['b'])
        hiddens.append(hidden)
        
    output = np.matmul(hidden, pm_output['w']) + pm_output['b']
    
    return output, hiddens

In [29]:
def backprop_neuralnet_hiddens(G_output, aux):
    global pm_output, pm_hiddens
    
    hiddens = aux
    
    g_output_w_out = hiddens[-1].transpose()
    G_w_out = np.matmul(g_output_w_out, G_output)
    G_b_out = np.sum(G_output, axis=0)
    
    g_output_hidden = pm_output['w'].transpose()
    G_hidden = np.matmul(G_output, g_output_hidden)
    
    pm_output['w'] -= LEARNING_RATE * G_w_out
    pm_output['b'] -= LEARNING_RATE * G_b_out
    
    for n in reversed(range(len(pm_hiddens))):
        G_hidden = G_hidden * relu_derv(hiddens[n+1])
        
        g_hidden_w_hid = hiddens[n].transpose()
        G_w_hid = np.matmul(g_hidden_w_hid, G_hidden)
        G_b_hid = np.sum(G_hidden, axis=0)
        
        g_hidden_hidden = pm_hiddens[m]['w'].transpose()
        G_hidden = np.matmul(G_hidden, g_hidden_hidden)
        
        pm_hiddens[n]['w'] -= LEARNING_RATE * G_w_hid
        pm_hiddens[n]['b'] -= LEARNING_RATE * G_b_hid

In [30]:
global hidden_config

def init_model():
    if hidden_config is not None:
        print('은닉 계층 {}개를 갖는 다층 퍼셉트론이 잘 작동되었습니다,'.format(len(hidden_config)))
        init_model_hidden()
        
    else:
        print('은닉 계층 1개를 갖는 다층 퍼셉트론이 작동되었습니다.')
        init_model_hidden1()
        
def forward_neuralnet(x):
    if hidden_config is not None:
        return forward_neuralnet_hiddens(x)
    else:
        return forward_neuralnet_hidden1(x)
    
def backprop_neuralnet(G_output, hiddens):
    if hidden_config is not None:
        backprop_neuralnet_hiddens(G_output, hiddens)
    else:
        backprop_neuralnet_hidden1(G_output, hiddens)

In [31]:
def set_hidden(info):
    global hidden_cnt, hidden_config
    if isinstance(info, int):
        hidden_cnt = info
        hidden_config = None
    else:
        hidden_config = info

In [32]:
def eval_accuracy(output, y):
    mdiff = np.mean(np.abs((output-y)/y))
    return 1 - mdiff

In [49]:
set_hidden(4)
abalone_exec(epoch_count=50, report=10)

은닉 계층 1개를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 10: loss=6.869, accuracy=0.805/0.812]
Epoch 20: loss=6.352, accuracy=0.812/0.817]
Epoch 30: loss=5.634, accuracy=0.824/0.843]
Epoch 40: loss=5.168, accuracy=0.833/0.844]
Epoch 50: loss=4.996, accuracy=0.837/0.846]

Final Test: final accuracy = 0.846


In [61]:
set_hidden(4)
abalone_exec(epoch_count=50, report=10)

은닉 계층 1개를 갖는 다층 퍼셉트론이 작동되었습니다.


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 4 is different from 10)

In [13]:
print(weight)
print(bias)

[[1.02697603]
 [1.47450981]
 [1.66960135]
 [2.04468668]
 [1.62513525]
 [0.60292627]
 [2.39993815]
 [0.54107313]
 [0.46878034]
 [1.01969382]]
[4.16894769]


In [14]:
LEARNING_RATE = 0.1
abalone_exec(epoch_count=100, mb_size=100, report=20)

Epoch 20: loss=5.804, accuracy=0.825/0.831]
Epoch 40: loss=5.259, accuracy=0.834/0.828]
Epoch 60: loss=5.056, accuracy=0.837/0.838]
Epoch 80: loss=4.950, accuracy=0.838/0.840]
Epoch 100: loss=4.910, accuracy=0.840/0.826]

Final Test: final accuracy = 0.826
