<a href="https://colab.research.google.com/github/hbhb0311/LikelionStudy/blob/main/3.HardCoding/1.HardCoding_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# abalone - 신경망 구현

## 파이썬 모듈 불러오기

In [1]:
import numpy as np
import csv

np.random.seed(106)
# 실험 결과를 재현하기 위함 
# 설정하지 않으면 매번 다른 결과가 출력됨

## 하이퍼파라미터값 정의

In [2]:
RND_MEAN = 0
RND_STD = 0.003

LEARNIG_RATE = 0.001
# 정규분포의 평균, 표준편차, 학습률 정의

## 메인함수 정의(main_exec)

In [3]:
def main_exec(epoch_count = 10, mb_size = 10, report = 1, train_rate = 0.8):
  load_dataset() # 데이터를 불러옴
  init_model() # 모델(가중치, 편향) 초기화
  train_and_test(epoch_count, mb_size, report, train_rate) # 학습 및 신경망 성능 테스트

### 데이터 불러오는 함수 정의(load_dataset)

In [4]:
def load_dataset():
  with open('./dataset/abalone.csv') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader) # next 함수 : 첫 행을 읽지않고 건너뜀
    rows = []

    for row in csvreader:
      rows.append(row)

  global data, input_cnt, output_cnt
  input_cnt, output_cnt = 10, 1 #input_cnt = 기존의 8개에 원핫인코딩을 위한 2개를 합친 10개
  data = np.zeros([len(rows), (input_cnt + output_cnt)])
  #np.zeros : 지정해준 크기만큼 0값의 행렬 생성

  for n, row in enumerate(rows):
    if row[0] == 'I': data[n,0] = 1
    if row[0] == 'M': data[n,1] = 1
    if row[0] == 'F': data[n,2] = 1
    data[n, 3:] = row[1:]
    

### 파라미터 초기화 함수 정의(init_model)

In [5]:
def init_model():
  global weight, bias, input_cnt, output_cnt
  # 함수 밖에서 전역 변수 선언해줬다면, 전역변수 다시 선언해주어야 함
  
  weight = np.random.normal(RND_MEAN, RND_STD, [input_cnt, output_cnt])
  bias = np.zeros([output_cnt])

  # 편향은 초기에 너무 큰 영향을 주어 학습에 역효과를 불러오지 않도록 0으로 초기화

## 학습과 테스트 진행 함수 정의(train_and_test)

In [6]:
def train_and_test(epoch_count, mb_size, report, train_rate):
  step_count = arrange_data(mb_size, train_rate)
  test_x, test_y = get_test_data()

  for epoch in range(epoch_count):
    losses, accs = [], []
    for n in range(step_count):
      train_x, train_y = get_train_data(mb_size, n)
      loss, acc = run_train(train_x, train_y)
      losses.append(loss)
      accs.append(acc)

    if report > 0 and (epoch + 1) % report == 0:
      acc = run_test(test_x, test_y)
      print('Epoch{}: Train - loss = {:5.3f}, accuracy = {:5.3f}, Test - {:5.3f}'\
            .format(epoch + 1, np.mean(losses), np.mean(accs), acc))
  
  final_acc = run_test(test_x, test_y)
  print('\n 최종 테스트 : final accuracy - {:5.3f}'.format(final_acc))


### 데이터 섞고 훈련, 테스트 데이터 비율 나누기(arrange_data)

In [7]:
def arrange_data(mb_size, train_rate):
  global data, shuffle_map, test_begin_index
  shuffle_map = np.arange(data.shape[0])
  np.random.shuffle(shuffle_map)
  
  step_count = int(data.shape[0] * train_rate) // mb_size
  # 몫만 가져오고 싶을 때는 // 사용!

  test_begin_index = step_count * mb_size

  return step_count

### 테스트 데이터 가져오기(get_test_data)

In [8]:
def get_test_data():
  global shuffle_map, test_begin_index, output_cnt, data
  test_data = data[shuffle_map[test_begin_index:]]

  return test_data[:,:-output_cnt], test_data[:,-output_cnt:]

### 훈련 데이터 가져오기 (get_train_data)

In [9]:
def get_train_data(mb_size, n):
  global shuffle_map, test_begin_index, data, output_cnt
  if n == 0:
    np.random.shuffle(shuffle_map[:test_begin_index])
  
  train_data = data[shuffle_map[mb_size * n : mb_size * (n + 1)]]

  return train_data[:, :-output_cnt], train_data[:, -output_cnt:]

### 학습 진행하기(run_train)

In [10]:
def run_train(x, y):
  output, aux_nn = forward_neuralnet(x)
  loss, aux_pp = forward_postproc(output, y)
  # aux_pp = diff (output - y)
  accuracy = eval_accuracy(output, y)

  G_output = backprop_postproc(aux_pp)
  backprop_neuralnet(G_output, aux_nn)

  return loss, accuracy

#### mse에 대한 역전파(backprop_postproc)

In [11]:
# mse에 대한 역전파
def backprop_postproc(diff):
  G_loss = 1
  shape = diff.shape

  g_loss_square = np.ones(shape) / np.prod(shape)
  g_square_diff = 2 * diff
  g_diff_output = 1

  G_output = G_loss * g_loss_square * g_square_diff * g_diff_output

  return G_output

#### 파라미터 갱신(backprop_neuralnet)

In [12]:
# 파라미터 갱신
def backprop_neuralnet(G_output, x):
  global weight, bias
  G_w = np.matmul(x.transpose(), G_output)
  G_b = np.sum(G_output, axis = 0)

  weight -= LEARNIG_RATE * G_w
  bias -= LEARNIG_RATE * G_b

#### 신경망 연산 수행(forward_neuralnet)

In [13]:
# 신경망 연산 수행
def forward_neuralnet(x):
  global weight, bias
  output = np.matmul(x, weight) + bias
   
  return output, x

#### 손실 구하기(forward_postproc)

In [14]:
# 손실 구하기
def forward_postproc(output, y):
  diff = output - y
  square = np.square(diff)
  loss = np.mean(square)

  return loss, diff

#### 정확도 측정(eval_accuracy)

In [15]:
def eval_accuracy(output, y):
  mdiff = np.mean(np.abs((output - y) / y))

  return 1 - mdiff

### 테스트 진행하기(run_test)

In [16]:
def run_test(x, y):
  output, _ = forward_neuralnet(x)
  # 인자 받을 필요 없으면 _ 으로 처리해줌
  accuracy = eval_accuracy(output, y)

  return accuracy

## 메인함수 실행 -> 학습

In [17]:
main_exec()

Epoch1: Train - loss = 33.431, accuracy = 0.558, Test - 0.805
Epoch2: Train - loss = 8.177, accuracy = 0.820, Test - 0.816
Epoch3: Train - loss = 7.548, accuracy = 0.812, Test - 0.814
Epoch4: Train - loss = 7.436, accuracy = 0.811, Test - 0.812
Epoch5: Train - loss = 7.352, accuracy = 0.810, Test - 0.812
Epoch6: Train - loss = 7.280, accuracy = 0.810, Test - 0.812
Epoch7: Train - loss = 7.220, accuracy = 0.810, Test - 0.813
Epoch8: Train - loss = 7.165, accuracy = 0.812, Test - 0.809
Epoch9: Train - loss = 7.123, accuracy = 0.809, Test - 0.811
Epoch10: Train - loss = 7.082, accuracy = 0.810, Test - 0.812

 최종 테스트 : final accuracy - 0.812


In [18]:
print(weight)
print(bias)

[[1.02660021]
 [1.48595404]
 [1.65734692]
 [2.03045336]
 [1.6027778 ]
 [0.59024699]
 [2.33957642]
 [0.5342599 ]
 [0.46536287]
 [1.0033168 ]]
[4.16502456]


## 하이퍼파라미터 수정

In [19]:
LEARNIG_RATE = 0.01
main_exec(20, 20, 2, 0.85)

Epoch2: Train - loss = 7.147, accuracy = 0.809, Test - 0.809
Epoch4: Train - loss = 6.836, accuracy = 0.812, Test - 0.802
Epoch6: Train - loss = 6.680, accuracy = 0.812, Test - 0.798
Epoch8: Train - loss = 6.578, accuracy = 0.812, Test - 0.809
Epoch10: Train - loss = 6.490, accuracy = 0.814, Test - 0.807
Epoch12: Train - loss = 6.408, accuracy = 0.814, Test - 0.811
Epoch14: Train - loss = 6.331, accuracy = 0.816, Test - 0.809
Epoch16: Train - loss = 6.253, accuracy = 0.816, Test - 0.815
Epoch18: Train - loss = 6.176, accuracy = 0.819, Test - 0.804
Epoch20: Train - loss = 6.113, accuracy = 0.820, Test - 0.811

 최종 테스트 : final accuracy - 0.811


## 새로운 x로 전복 나이 구해보기

In [20]:
new_x = [0, 1, 0, 0.685,0.545, 0.18,1.42,0.674,0.392,0.5]
output = forward_neuralnet(new_x)

print(output[0] + 1.5)

[13.86191416]
