In [7]:
%load_ext autoreload
%autoreload 2
from network import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
# 필요한 라이브러리 불러오기
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

def one_hot_encode(y):
    encoder = OneHotEncoder(sparse=False)
    y_reshaped = y.reshape(-1, 1)
    return encoder.fit_transform(y_reshaped)

# Iris 데이터셋 불러오기
iris = load_iris()
X = iris.data
y = iris.target
y = one_hot_encode(y)
# 데이터셋을 train과 test로 분할 (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 결과 출력
print(f'Training set size: {X_train.shape[0]}')
print(f'Test set size: {X_test.shape[0]}')


Training set size: 120
Test set size: 30




## 5.7.3 오차역전파법으로 구한 기울기 검증하기

In [42]:
network = TwoLayerNetwork(X_train[0].size, 50, y_train[0].size)

In [43]:
X_batch = X_train[:3]
y_batch = y_train[:3]

grad_numerical = network.numerical_gradient(X_batch, y_batch)
grad_backprop = network.gradient(X_batch, y_batch)

[[1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]]


In [44]:
for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W1:0.006253265149381395
b1:0.002062003236371583
W2:0.009672106105516158
b2:0.2222222223450869


## 5.7.4 오차역전파법을 사용한 학습 구현하기

In [51]:
num_iter = 1000
train_size = X_train.shape[0]
batch_size = 50
learning_rate = 0.1
train_loss_log = []
train_acc_log = []
test_acc_log = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(num_iter):
    batch_mask = np.random.choice(train_size, batch_size)
    X_batch = X_train[batch_mask]
    y_batch = y_train[batch_mask]
    
    grad = network.gradient(X_batch, y_batch)
    for key in list(network.params.keys()):
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(X_batch, y_batch)
    train_loss_log.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(X_train, y_train)
        test_acc = network.accuracy(X_test, y_test)
        train_acc_log.append(train_acc)
        test_acc_log.append(test_acc)
        print(train_acc, test_acc)

0.3333333333333333 0.3333333333333333
