#데이터 증폭 버전 이진분류 학습 

In [11]:
%run /content/MathUtils.ipynb
%run /content/abalone_model.ipynb

In [12]:
def binary_classification_exec(epoch_count = 10, mb_size = 10, report = 1, train_ratio = 0.6, val_ratio = 0.2, adjust_ratio = False):
    binary_load_dataset(adjust_ratio)
    init_param()
    train_metrics_mean_row, val_metrics_row, test_metrics = train_and_test(epoch_count, mb_size, report, train_ratio, val_ratio)

    return train_metrics_mean_row, val_metrics_row, test_metrics

In [13]:
def binary_load_dataset(adjust_ratio):

    pulsars, stars = [], []

    with open('/content/pulsar_stars.csv') as csvfile:

        csvreader = csv.reader(csvfile)
        next(csvreader)

        for row in csvreader:
            if row[8] == '1' : pulsars.append(row)

            else:
                stars.append(row)

    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 8, 1

    star_cnt, pulsar_cnt = len(stars), len(pulsars)

    # 증폭 과정 수행 
    if adjust_ratio:
        data = np.zeros([ star_cnt * 2 , 9])
        data[0 : star_cnt, : ] = np.asarray(stars, dtype='float32')
        
        for n in range(star_cnt):
            data[star_cnt + n] = np.asarray(pulsars[n % pulsar_cnt], dtype='float32')

    # 증폭 과정 수행 X
    else:
        data = np.zeros([ star_cnt + pulsar_cnt ,  9])
        data[0 : star_cnt, : ] = np.asarray(stars, dtype='float32')
        data[star_cnt : , :  ] = np.asarray(pulsars, dtype='float32')

In [14]:
def init_param():
    global weight, bias 

    # 기능 축소 
#    weight_initial = []
#    bias_initial   = []

    # input_cnt = 8 , output_cnt = 1
    weight = np.random.normal(RND_MEAN, RND_STD, size = [input_cnt, output_cnt])
    bias   = np.zeros([output_cnt])
    print("Initial Weight Value : \n{}".format(weight))
    print("Initial Bias Value : \n{}".format(bias))

#    weight_initial.append(weight)
#    bias_initial.append(bias)

#    return weight_initial, bias_initial

In [15]:
def safe_div(p, q):
    p, q = float(p), float(q)
    if np.abs(q) < 1.0e-20:
        return np.sign(p)
    return p / q

In [16]:
def eval_accuracy_numpy(output,y):

    # 예측값을 0과 비교하여 줍니다. 
    # 예측값을 0과 비교하는 이유는 시그모이드 활성화 함수의 특징인데, 
    # 입력값이 음수인 경우는 출력값이 0.5 보다 작으니 0으로, 
    # 입력값이 양수인 경우는 출력값이 0.5 보다 크니 1로 판단할 수 있습니다.

    est_yes = np.greater(output,0)
    ans_yes = np.greater(y, 0.5)

    est_no = np.logical_not(est_yes) 
    ans_no = np.logical_not(ans_yes)


    # 마찬가지로 평가에 사용된 결과를 모두 더해 
    # 각각의 혼동행렬을 만들어 줍니다.  
    tp = np.sum(np.logical_and(est_yes, ans_yes))
    tn = np.sum(np.logical_and(est_no, ans_no))
    fp = np.sum(np.logical_and(ans_no, est_yes))
    fn = np.sum(np.logical_and(ans_yes, est_no))

    # 다음은 정확도 측정 지표를 만들어 줍니다. 
    # 이때 나눗셈의 경우 0으로 나눠지는 경우가 
    # 발생할 수 있기에, 
    # 이 점을 고려하여 안전한 나눗셈 함수를 만들어 주겠습니다. 
    accuracy = safe_div(tp+tn,tp+fp+fn+tn)
    precision = safe_div(tp,tp+fp)
    recall = safe_div(tp,tp+fn)
    f1 = 2 * safe_div(recall*precision,recall+precision)
    
    return [accuracy, precision, recall, f1]

In [17]:
def train_and_test(epoch_count, mb_size, report, train_ratio, val_ratio):

    mini_batch_step_count = arrange_data(mb_size,train_ratio, val_ratio)

    test_x, test_y = get_test_data()
    val_x,  val_y  = get_val_data()

    losses_mean_row = []
    val_loss_row    = []

    #losses_mean_row, accs_mean_row = [], []
    #val_loss_row, val_acc_row      = [], []
    
    for epoch in range(epoch_count):

        losses = []
        #accs = []

        for n in range(mini_batch_step_count):
            train_x, train_y  = get_train_data(mb_size, n)
           
            loss, _           = run_train(train_x,train_y)
               
            losses.append(loss)
            #accs.append(acc)

        val_loss, val_acc = run_test(val_x, val_y)
        val_loss_row.append(val_loss)
        #val_acc_row.append(val_acc)  

        if report > 0 and (epoch+1) % report == 0:
            
            print("Epoch {} : Train - Loss = {:.3f} / Val - Loss = {:.3f}, Acc = {:.3f}, Precision = {:.3f}, Recall = {:.3f}, F1 = {:.3F}".\
                  format(epoch+1, np.mean(losses), val_loss, val_acc[0], val_acc[1], val_acc[2], val_acc[3]))
            
        losses_mean = np.mean(losses) 
        #accs_mean = np.mean(accs)

        losses_mean_row.append(losses_mean)  
        #accs_mean_row.append(accs_mean)   



    test_loss, test_acc = run_test(test_x, test_y)
    
    print("\n","=" * 50, 'Final Test', '=' * 50)
    print('\nTest Acc = {:.3f}, Precision = {:.3f}, Recall = {:.3f}, F1 = {:.3F}'.\
          format(test_acc[0], test_acc[1], test_acc[2], test_acc[3]))
    print('\nLoss = {:.3f}'.format(test_loss))

    #return [losses_mean_row, accs_mean_row], [val_loss_row, val_acc_row], [test_loss , test_acc]
    return [losses_mean_row], [val_loss_row], [test_loss , test_acc]

In [18]:
def run_train(x, y):
    y_hat, aux_nn_x           = forward_neuralnet(x)
    loss, aux_pp_y_output_CEE = forward_postproc(y_hat, y)

    accuracy = eval_accuracy_numpy(y_hat, y)

    G_output = backprop_postproc(aux_pp_y_output_CEE)
    backprop_neuralnet(G_output, aux_nn_x)

    return loss, accuracy

In [19]:
def run_test(x, y):
    y_hat, _ = forward_neuralnet(x)
    loss, _  = forward_postproc(y_hat, y)
    accuracy = eval_accuracy_numpy(y_hat, y)

    return loss, accuracy

In [20]:
def arrange_data(mb_size, train_ratio, val_ratio):
    
    global shuffle_map, test_begin_index, val_begin_index

    shuffle_map = np.arange(data.shape[0])
    np.random.shuffle(shuffle_map)

    mini_batch_step_count = int(data.shape[0] * train_ratio) // mb_size

    val_begin_index  = mini_batch_step_count * mb_size
    test_begin_index = int(val_begin_index + (val_ratio * data.shape[0])) 


    return mini_batch_step_count

In [21]:
def get_train_data(mb_size, n):
    # 기능 축소 
    #if n == 0:
    #    np.random.shuffle(shuffle_map[:test_begin_index])
    
    # n 값은 미니배치의 수  
    from_idx = mb_size * n
    to_idx   = mb_size * (n+1)

    train_data = data[shuffle_map[from_idx : to_idx ]]

    return train_data[ : , : -output_cnt], train_data[ : , -output_cnt : ]

def get_test_data():
    test_data = data[shuffle_map[test_begin_index:]]
    return test_data[ : , : -output_cnt], test_data[ : , -output_cnt : ]

def get_val_data():
    val_data = data[shuffle_map[ val_begin_index : test_begin_index ]]
    return val_data[ : , : -output_cnt], val_data[ : , -output_cnt : ]

In [22]:
def forward_neuralnet(x):
    y_hat = np.matmul(x, weight) + bias
    return y_hat, x

In [23]:
def relu(x):
    return np.maximum(x, 0)

In [24]:
# z 값 실제 y값, x값은 신경망의 예측값
def sigmoid_cross_entropy_with_logits(z, x):
    
    return relu(x) - x * z + np.log(1+np.exp(-np.abs(x)))

In [25]:
def forward_postproc(output, y):
    CEE  = sigmoid_cross_entropy_with_logits(y, output)
    loss = np.mean(CEE)

    return loss, [y, output, CEE]

In [26]:
def backprop_neuralnet(G_output, x):
    global weight, bias 

    x_transpose = x.transpose()
    G_w = np.matmul(x_transpose, G_output)
    
    G_b = np.sum(G_output, axis = 0)

    weight -= LEARNING_RATE * G_w
    bias   -= LEARNING_RATE * G_b

In [27]:
def sigmoid_cross_entropy_with_logits_derv(z, x):
    return -z + sigmoid(x)

In [28]:
def sigmoid(x):
    return np.exp(-relu(-x)) / (1.0 + np.exp(-np.abs(x)))

In [29]:
def backprop_postproc(aux_pp_y_output_CEE):

    y, output, CEE = aux_pp_y_output_CEE

    g_loss_entropy   = 1.0 / np.prod(CEE.shape)
    g_entropy_output = sigmoid_cross_entropy_with_logits_derv(y, output)

    G_output = g_entropy_output * g_loss_entropy 

    return G_output


In [30]:
train_metrics_mean_row, val_metrics_row, test_metrics = binary_classification_exec(epoch_count = 100, 
                                                                                   mb_size     = 32, 
                                                                                   report      = 1, 
                                                                                   train_ratio = 0.6, 
                                                                                   val_ratio   = 0.2)

Initial Weight Value : 
[[-0.04428074]
 [-0.0130573 ]
 [-0.00272253]
 [ 0.01814162]
 [ 0.0417707 ]
 [-0.00399044]
 [ 0.00591716]
 [-0.01894233]]
Initial Bias Value : 
[0.]
Epoch 1 : Train - Loss = 8.248 / Val - Loss = 34.593, Acc = 0.927, Precision = 1.000, Recall = 0.237, F1 = 0.383
Epoch 2 : Train - Loss = 5.935 / Val - Loss = 2.285, Acc = 0.974, Precision = 0.946, Recall = 0.775, F1 = 0.852
Epoch 3 : Train - Loss = 5.737 / Val - Loss = 16.499, Acc = 0.947, Precision = 0.994, Recall = 0.447, F1 = 0.617
Epoch 4 : Train - Loss = 4.695 / Val - Loss = 16.783, Acc = 0.944, Precision = 1.000, Recall = 0.418, F1 = 0.590
Epoch 5 : Train - Loss = 4.671 / Val - Loss = 4.693, Acc = 0.968, Precision = 0.868, Recall = 0.789, F1 = 0.827
Epoch 6 : Train - Loss = 5.395 / Val - Loss = 22.159, Acc = 0.941, Precision = 1.000, Recall = 0.386, F1 = 0.557
Epoch 7 : Train - Loss = 5.070 / Val - Loss = 28.312, Acc = 0.938, Precision = 1.000, Recall = 0.354, F1 = 0.523
Epoch 8 : Train - Loss = 4.968 / Val - 