# 다층 퍼셉트론 기본 구조

## 천체 펄서 판정 문제 신경망

In [1]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00372/HTRU2.zip

--2021-02-05 04:59:41--  https://archive.ics.uci.edu/ml/machine-learning-databases/00372/HTRU2.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1563015 (1.5M) [application/x-httpd-php]
Saving to: ‘HTRU2.zip’


2021-02-05 04:59:42 (1.90 MB/s) - ‘HTRU2.zip’ saved [1563015/1563015]



In [2]:
!unzip HTRU2.zip

Archive:  HTRU2.zip
  inflating: HTRU_2.csv              
  inflating: HTRU_2.arff             
  inflating: Readme.txt              


### 파이썬 모듈 불러들이기

In [3]:
import numpy as np
import csv
import time

np.random.seed(1234)

def randomize():
    np.random.seed(time.time())

### 하이퍼파라미터값의 정의

In [4]:
RND_MEAN = 0
RND_STD = 0.0030

LEARNING_RATE = 0.001

### 메인 함수 정의

In [5]:
def pulsar_exec(epoch_count=10, mb_size=10, report=1, adjust_ratio=False):
    load_pulsar_dataset(adjust_ratio)
    init_model()
    train_and_test(epoch_count, mb_size, report)

### 데이터 적재 함수 정의

In [6]:
def load_pulsar_dataset(adjust_ratio):
    pulsars, stars = [], []
    with open('HTRU_2.csv') as csvfile:
        csvreader = csv.reader(csvfile)
        rows = []
        for row in csvreader:
            if row[8] == '1':
                pulsars.append(row)
            else:
                stars.append(row)
            
    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 8, 1
    
    star_cnt, pulsar_cnt = len(stars), len(pulsars)

    if adjust_ratio:
        data = np.zeros([2*star_cnt, 9])
        data[0:star_cnt, :] = np.asarray(stars, dtype='float32')
        for n in range(star_cnt):
            data[star_cnt+n] = np.asarray(pulsars[n % pulsar_cnt], dtype='float32')
    else:
        data = np.zeros([star_cnt+pulsar_cnt, 9])
        data[0:star_cnt, :] = np.asarray(stars, dtype='float32')
        data[star_cnt:, :] = np.asarray(pulsars, dtype='float32')

### 은닉 계층 하나를 위한 파라미터 생성 함수 정의

In [7]:
def init_model_hidden1():
    global pm_output, pm_hidden, input_cnt, output_cnt, hidden_cnt
    
    pm_hidden = alloc_param_pair([input_cnt, hidden_cnt])
    pm_output = alloc_param_pair([hidden_cnt, output_cnt])
    
def alloc_param_pair(shape):
    weight = np.random.normal(RND_MEAN, RND_STD, shape)
    bias = np.zeros(shape[-1])
    return {'w':weight, 'b':bias}

### 학습 및 평가 함수 정의

In [8]:
def train_and_test(epoch_count, mb_size, report):
    step_count = arrange_data(mb_size)
    test_x, test_y = get_test_data()
    
    for epoch in range(epoch_count):
        losses = []
        
        for n in range(step_count):
            train_x, train_y = get_train_data(mb_size, n)
            loss, _ = run_train(train_x, train_y)
            losses.append(loss)
            
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x, test_y)
            acc_str = ','.join(['%5.3f']*4) % tuple(acc)
            print('Epoch {}: loss={:5.3f}, result={}'. \
                  format(epoch+1, np.mean(losses), acc_str))
            
    acc = run_test(test_x, test_y)
    acc_str = ','.join(['%5.3f']*4) % tuple(acc)
    print('\nFinal Test: final result = {}'.format(acc_str))

### 학습 및 평가  데이터 획득 함수 정의

In [9]:
def arrange_data(mb_size):
    global data, shuffle_map, test_begin_idx
    shuffle_map = np.arange(data.shape[0])
    np.random.shuffle(shuffle_map)
    step_count = int(data.shape[0] * 0.8) // mb_size
    test_begin_idx = step_count * mb_size
    return step_count

def get_test_data():
    global data, shuffle_map, test_begin_idx, output_cnt
    test_data = data[shuffle_map[test_begin_idx:]]
    return test_data[:, :-output_cnt], test_data[:, -output_cnt:]

def get_train_data(mb_size, nth):
    global data, shuffle_map, test_begin_idx, output_cnt
    if nth == 0:
        np.random.shuffle(shuffle_map[:test_begin_idx])
    train_data = data[shuffle_map[mb_size*nth:mb_size*(nth+1)]]
    return train_data[:, :-output_cnt], train_data[:, -output_cnt:]

### 학습 실행 함수와 평가 실행 함수 정의

In [10]:
def run_train(x, y):
    output, aux_nn = forward_neuralnet(x)
    loss, aux_pp = forward_postproc(output, y)
    accuracy = eval_accuracy(output, y)
    
    G_loss = 1.0
    G_output = backprop_postproc(G_loss, aux_pp)
    backprop_neuralnet(G_output, aux_nn)
    
    return loss, accuracy

def run_test(x, y):
    output, _ = forward_neuralnet(x)
    accuracy = eval_accuracy(output, y)
    return accuracy

### 은닉 계층 하나를 위한 순전파 함수 정의

In [11]:
def forward_neuralnet_hidden1(x):
    global pm_output, pm_hidden
    
    hidden = relu(np.matmul(x, pm_hidden['w']) + pm_hidden['b'])
    output = np.matmul(hidden, pm_output['w']) + pm_output['b']
    
    return output, [x, hidden]

def relu(x):
    return np.maximum(x, 0)

### 후처리 과정에 대한 순전파와 역전파 함수

In [12]:
def forward_postproc(output, y):
    entropy = sigmoid_cross_entropy_with_logits(y, output)
    loss = np.mean(entropy)
    return loss, [y, output, entropy]

def backprop_postproc(G_loss, aux):
    y, output, entropy = aux

    g_loss_entropy = 1.0 / np.prod(entropy.shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y, output)

    G_entropy = g_loss_entropy * G_loss
    G_output = g_entropy_output * G_entropy
    
    return G_output

### 은닉 계층 하나를 위한 역전파 함수 정의

In [13]:
def backprop_neuralnet_hidden1(G_output, aux):
    global pm_output, pm_hidden
    
    x, hidden = aux

    g_output_w_out = hidden.transpose()                      
    G_w_out = np.matmul(g_output_w_out, G_output)            
    G_b_out = np.sum(G_output, axis=0)                       

    g_output_hidden = pm_output['w'].transpose()             
    G_hidden = np.matmul(G_output, g_output_hidden)          

    pm_output['w'] -= LEARNING_RATE * G_w_out                
    pm_output['b'] -= LEARNING_RATE * G_b_out                
    
    G_hidden = G_hidden * relu_derv(hidden)
    
    g_hidden_w_hid = x.transpose()                           
    G_w_hid = np.matmul(g_hidden_w_hid, G_hidden)            
    G_b_hid = np.sum(G_hidden, axis=0)                       
    
    pm_hidden['w'] -= LEARNING_RATE * G_w_hid                
    pm_hidden['b'] -= LEARNING_RATE * G_b_hid                
    
def relu_derv(y):
    return np.sign(y)

### 가변적 은닉 계층 구성을 위한 파라미터 생성 함수 정의

In [14]:
def init_model_hiddens():
    global pm_output, pm_hiddens, input_cnt, output_cnt, hidden_config
    
    pm_hiddens = []
    prev_cnt = input_cnt
    
    for hidden_cnt in hidden_config:
        pm_hiddens.append(alloc_param_pair([prev_cnt, hidden_cnt]))
        prev_cnt = hidden_cnt
    
    pm_output = alloc_param_pair([prev_cnt, output_cnt])

### 가변적 은닉 계층 구성을 위한 순전파 함수 정의

In [15]:
def forward_neuralnet_hiddens(x):
    global pm_output, pm_hiddens
    
    hidden = x
    hiddens = [x]
    
    for pm_hidden in pm_hiddens:
        hidden = relu(np.matmul(hidden, pm_hidden['w']) + pm_hidden['b'])
        hiddens.append(hidden)
        
    output = np.matmul(hidden, pm_output['w']) + pm_output['b']
    
    return output, hiddens

### 가변적 은닉 계층 구성을 위한 역전파 함수 정의

In [16]:
def backprop_neuralnet_hiddens(G_output, aux):
    global pm_output, pm_hiddens

    hiddens = aux
    
    g_output_w_out = hiddens[-1].transpose()
    G_w_out = np.matmul(g_output_w_out, G_output)
    G_b_out = np.sum(G_output, axis=0)

    g_output_hidden = pm_output['w'].transpose() 
    G_hidden = np.matmul(G_output, g_output_hidden)

    pm_output['w'] -= LEARNING_RATE * G_w_out
    pm_output['b'] -= LEARNING_RATE * G_b_out
    
    for n in reversed(range(len(pm_hiddens))):
        G_hidden = G_hidden * relu_derv(hiddens[n+1])

        g_hidden_w_hid = hiddens[n].transpose()
        G_w_hid = np.matmul(g_hidden_w_hid, G_hidden)
        G_b_hid = np.sum(G_hidden, axis=0)
    
        g_hidden_hidden = pm_hiddens[n]['w'].transpose()
        G_hidden = np.matmul(G_hidden, g_hidden_hidden)

        pm_hiddens[n]['w'] -= LEARNING_RATE * G_w_hid
        pm_hiddens[n]['b'] -= LEARNING_RATE * G_b_hid

### 스위치 함수 정의

In [17]:
global hidden_config

def init_model():
    if hidden_config is not None:
        print('은닉 계층 {}개를 갖는 다층 퍼셉트론이 작동되었습니다.'. \
              format(len(hidden_config)))
        init_model_hiddens()
    else:
        print('은닉 계층 하나를 갖는 다층 퍼셉트론이 작동되었습니다.')
        init_model_hidden1()
    
def forward_neuralnet(x):
    if hidden_config is not None:
        return forward_neuralnet_hiddens(x)
    else:
        return forward_neuralnet_hidden1(x)
    
def backprop_neuralnet(G_output, hiddens):
    if hidden_config is not None:
        backprop_neuralnet_hiddens(G_output, hiddens)
    else:
        backprop_neuralnet_hidden1(G_output, hiddens)

### 은닉 계층 구조 지정 함수 정의

In [18]:
def set_hidden(info):
    global hidden_cnt, hidden_config
    if isinstance(info, int):
        hidden_cnt = info
        hidden_config = None
    else:
        hidden_config = info

### 정확도 계산 함수

In [19]:
def eval_accuracy(output, y):
    est_yes = np.greater(output, 0)
    ans_yes = np.greater(y, 0.5)
    est_no = np.logical_not(est_yes)
    ans_no = np.logical_not(ans_yes)
    
    tp = np.sum(np.logical_and(est_yes, ans_yes))
    fp = np.sum(np.logical_and(est_yes, ans_no))
    fn = np.sum(np.logical_and(est_no, ans_yes))
    tn = np.sum(np.logical_and(est_no, ans_no))
    
    accuracy = safe_div(tp+tn, tp+tn+fp+fn)
    precision = safe_div(tp, tp+fp)
    recall = safe_div(tp, tp+fn)
    f1 = 2 * safe_div(recall*precision, recall+precision)
    
    return [accuracy, precision, recall, f1]

def safe_div(p, q):
    p, q = float(p), float(q)
    if np.abs(q) < 1.0e-20: return np.sign(p)
    return p / q

### 시그모이드 관련 함수 정의

In [20]:
def relu(x):
    return np.maximum(x, 0)

def sigmoid(x):
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))

def sigmoid_derv(x, y):
    return y * (1 - y)

def sigmoid_cross_entropy_with_logits(z, x):
    return relu(x) - x * z + np.log(1 + np.exp(-np.abs(x)))

def sigmoid_cross_entropy_with_logits_derv(z, x):
    return -z + sigmoid(x)

## 실행하기

### 단층 퍼셉트론 처리하기

In [21]:
set_hidden([])
pulsar_exec(adjust_ratio=True)

은닉 계층 0개를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 1: loss=0.413, result=0.924,0.932,0.916,0.924
Epoch 2: loss=0.380, result=0.920,0.976,0.862,0.915
Epoch 3: loss=0.374, result=0.899,0.872,0.937,0.903
Epoch 4: loss=0.382, result=0.870,0.825,0.942,0.879
Epoch 5: loss=0.376, result=0.910,0.895,0.930,0.912
Epoch 6: loss=0.367, result=0.828,0.757,0.969,0.850
Epoch 7: loss=0.367, result=0.900,0.875,0.935,0.904
Epoch 8: loss=0.367, result=0.890,0.994,0.786,0.878
Epoch 9: loss=0.371, result=0.925,0.934,0.916,0.925
Epoch 10: loss=0.367, result=0.598,0.556,0.997,0.714

Final Test: final result = 0.598,0.556,0.997,0.714


### 은닉 계층 하나로 풀어보는 펄서 여부 판정 문제

In [22]:
set_hidden(6)
pulsar_exec(epoch_count=50, report=10, adjust_ratio=True)

은닉 계층 하나를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 10: loss=0.209, result=0.927,0.933,0.920,0.926
Epoch 20: loss=0.204, result=0.932,0.965,0.897,0.929
Epoch 30: loss=0.202, result=0.934,0.963,0.903,0.932
Epoch 40: loss=0.199, result=0.934,0.963,0.903,0.932
Epoch 50: loss=0.199, result=0.931,0.947,0.913,0.930

Final Test: final result = 0.931,0.947,0.913,0.930


### 은닉 계층 하나인 다층 퍼셉트론의 두 가지 표현

In [26]:
set_hidden(6)
pulsar_exec(epoch_count=50, report=10, adjust_ratio=True)

은닉 계층 하나를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 10: loss=0.209, result=0.924,0.924,0.923,0.924
Epoch 20: loss=0.200, result=0.930,0.960,0.897,0.927
Epoch 30: loss=0.198, result=0.928,0.960,0.893,0.925
Epoch 40: loss=0.196, result=0.929,0.954,0.901,0.927
Epoch 50: loss=0.194, result=0.926,0.930,0.922,0.926

Final Test: final result = 0.926,0.930,0.922,0.926


### 은닉 계층 두 개로 풀어보는 펄서 여부 판정 문제

In [29]:
set_hidden([12, 6])
pulsar_exec(epoch_count=200, report=40, adjust_ratio=True)

은닉 계층 2개를 갖는 다층 퍼셉트론이 작동되었습니다.
Epoch 40: loss=0.201, result=0.921,0.962,0.877,0.918
Epoch 80: loss=0.195, result=0.927,0.958,0.894,0.925
Epoch 120: loss=0.192, result=0.917,0.908,0.931,0.919
Epoch 160: loss=0.190, result=0.928,0.960,0.895,0.926
Epoch 200: loss=0.188, result=0.926,0.971,0.881,0.924

Final Test: final result = 0.926,0.971,0.881,0.924
