In [79]:
import numpy as np

class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        
        self.x = None
        self.original_x_shape = None
        # 가중치와 편향 매개변수의 미분
        self.dW = None
        self.db = None

    def forward(self, x):
        # 텐서 대응
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b

        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        dx = dx.reshape(*self.original_x_shape)  # 입력 데이터 모양 변경(텐서 대응)
        return dx


class MeanSquaredError:
    def __init__(self):
        self.loss = None  # 손실 값
        self.y = None     # 예측값
        self.t = None     # 정답값

    def forward(self, y, t):
        self.y = y
        self.t = t
        # 오차 계산
        self.loss = 0.5 * np.mean((self.y - self.t) ** 2)
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) * dout / batch_size
        return dx


In [80]:
import numpy as np
z = np.array([1,1])
z = z.reshape(1, 2)
print(z)
print(z.shape)

[[1 1]]
(1, 2)


In [81]:
np.random.seed(42)

W = np.random.randn(2,1)
print(np.shape(W))
print(W)
b = np.zeros(1, )
print(np.shape(b))
print(b)
affine = Affine(W, b)

(2, 1)
[[ 0.49671415]
 [-0.1382643 ]]
(1,)
[0.]


In [82]:
out = affine.forward(z)
print(out)
print(np.shape(out))

[[0.35844985]]
(1, 1)


In [83]:
dout = np.ones(1, )
print(dout)
print(np.shape(dout))

[1.]
(1,)


In [84]:
dout = affine.backward(dout)
print(dout)

[[ 0.49671415 -0.1382643 ]]


In [85]:
print(affine.dW)

[1. 1.]


In [86]:
t = 0

t = np.zeros(1)
print(t)

[0.]


In [87]:
mse = MeanSquaredError()

In [88]:
out = mse.forward(out, t)
print(out)

0.06424314814207618


In [89]:
dout = np.ones(1)
dout = mse.backward(dout)
print(dout)

[[0.35844985]]


In [90]:
dout = affine.backward(dout)
print("손실함수에 대한 입력 데이터 z의 미분 : ",dout)

손실함수에 대한 입력 데이터 z의 미분 :  [[ 0.17804711 -0.04956082]]


In [91]:
dW = affine.dW
print(dW)

[[0.35844985]
 [0.35844985]]


In [92]:
db = affine.db
print(db)

[0.35844985]


In [93]:
lr = 0.01
affine.W = affine.W - lr * affine.dW
affine.b = affine.b - lr * affine.db
print(affine.W)
print(affine.b)

[[ 0.49312965]
 [-0.1418488 ]]
[-0.0035845]


In [94]:
out = affine.forward(z)
print(out)

[[0.34769636]]


In [95]:
out = mse.forward(out, t)
print("한번 경사하강법으로 이동한 후 예측값 : ",out)

한번 경사하강법으로 이동한 후 예측값 :  0.060446378086879485


In [96]:
print("0번 경사하강법으로 이동한 후 손실값 : ", 0.06424314814207618)
print("1번 경사하강법으로 이동한 후 손실값 : ", out)


0번 경사하강법으로 이동한 후 손실값 :  0.06424314814207618
1번 경사하강법으로 이동한 후 손실값 :  0.060446378086879485


### 최종 코드 : XOR 1번째 데이터 [1,1]에 대한 정답값 [0]을 맞추는 모델 학습

In [100]:
z = np.array([1,1])
z = np.reshape(z, (1, -1))
t = np.zeros(1)

print(np.shape(z))
print(np.shape(t))

lr = 0.01

# 모델, 손실함수 설계
W = np.random.randn(2, 1)
b = np.zeros(1)
affine = Affine(W, b)
mse = MeanSquaredError()

# 경사하강법 학습
for i in range(400):
    # forward
    out = affine.forward(z)
    print("{}th 예측값 : {}".format(i, np.round(out, 3)))
    out = mse.forward(out, t)
    print("{}th 손실값 : {}".format(i, np.round(out, 3)))

    # backward
    dout = np.ones(1)
    dout = mse.backward(dout)
    dout = affine.backward(dout)

    # update
    affine.W -= lr * affine.dW
    affine.b -= lr * affine.db

(1, 2)
(1,)
0th 예측값 : [[0.073]]
0th 손실값 : 0.003
1th 예측값 : [[0.071]]
1th 손실값 : 0.003
2th 예측값 : [[0.069]]
2th 손실값 : 0.002
3th 예측값 : [[0.067]]
3th 손실값 : 0.002
4th 예측값 : [[0.065]]
4th 손실값 : 0.002
5th 예측값 : [[0.063]]
5th 손실값 : 0.002
6th 예측값 : [[0.061]]
6th 손실값 : 0.002
7th 예측값 : [[0.059]]
7th 손실값 : 0.002
8th 예측값 : [[0.057]]
8th 손실값 : 0.002
9th 예측값 : [[0.056]]
9th 손실값 : 0.002
10th 예측값 : [[0.054]]
10th 손실값 : 0.001
11th 예측값 : [[0.052]]
11th 손실값 : 0.001
12th 예측값 : [[0.051]]
12th 손실값 : 0.001
13th 예측값 : [[0.049]]
13th 손실값 : 0.001
14th 예측값 : [[0.048]]
14th 손실값 : 0.001
15th 예측값 : [[0.046]]
15th 손실값 : 0.001
16th 예측값 : [[0.045]]
16th 손실값 : 0.001
17th 예측값 : [[0.044]]
17th 손실값 : 0.001
18th 예측값 : [[0.042]]
18th 손실값 : 0.001
19th 예측값 : [[0.041]]
19th 손실값 : 0.001
20th 예측값 : [[0.04]]
20th 손실값 : 0.001
21th 예측값 : [[0.039]]
21th 손실값 : 0.001
22th 예측값 : [[0.037]]
22th 손실값 : 0.001
23th 예측값 : [[0.036]]
23th 손실값 : 0.001
24th 예측값 : [[0.035]]
24th 손실값 : 0.001
25th 예측값 : [[0.034]]
25th 손실값 : 0.001
26th 예측값 : [[0.033]]
