PYTORCH로 딥러닝하기 60분만에 끝장내기

In [None]:
import numpy as np
import math

x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

In [None]:
learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x ).sum()
    grad_c = (grad_y_pred * x ** 2 ).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')    

99 1091.1156299186953
199 755.1918059758764
299 524.0857723164888
399 364.9147135982636
499 255.16769302454276
599 179.41645722921936
699 127.0748907133964
799 90.87105587912048
899 65.80399490916949
999 48.4306794748704
1099 36.37806350683603
1199 28.00879232693523
1299 22.191918714842416
1399 18.145462013006345
1499 15.328176908062694
1599 13.365067576661247
1699 11.996069006594155
1799 11.040651279289344
1899 10.373380097867267
1999 9.907024433582409
Result: y = -0.032233709870843744 + 0.8443490929222791 x + 0.0055608513239680055 x^2 + -0.09156777048985693 x^3


파이토치(PyTorch): 텐서(Tensor)

In [None]:
import torch 
import math

dtype = torch.float
device = torch.device('cpu')

x = torch.linspace(-math.pi, math.pi, 2000, device = device, dtype = dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # 순전파 단계: 예측값 y를 계산합니다
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # 손실(loss)을 계산하고 출력합니다
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # 손실에 따른 a, b, c, d의 변화도(gradient)를 계산하고 역전파합니다.
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # 가중치를 갱신합니다.
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 4015.510986328125
199 2679.175537109375
299 1789.4046630859375
399 1196.713623046875
499 801.7317504882812
599 538.3814086914062
699 362.7063903808594
799 245.45521545410156
899 167.15428161621094
999 114.83396911621094
1099 79.85237121582031
1199 56.448448181152344
1299 40.779869079589844
1399 30.28263282775879
1499 23.244840621948242
1599 18.522741317749023
1699 15.351943969726562
1799 13.221080780029297
1899 11.787829399108887
1999 10.822983741760254
Result: y = 0.026613879948854446 + 0.8207128047943115 x + -0.004591335076838732 x^2 + -0.0882057175040245 x^3


PyTorch: 텐서(Tensor)와 autograd

In [None]:
import torch
import math

dtype = torch.float
device = torch.device('cpu')

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())
    
    loss.backward()
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # 가중치 갱신 후에는 변화도를 직접 0으로 만듭니다.
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 3625.2734375
199 2426.57763671875
299 1626.228515625
399 1091.5406494140625
499 734.1162719726562
599 495.03643798828125
699 335.0109558105469
799 227.82546997070312
899 155.98062133789062
999 107.7879409790039
1099 75.43544006347656
1199 53.69914627075195
1299 39.083213806152344
1399 29.24660873413086
1499 22.620521545410156
1599 18.152965545654297
1699 15.137909889221191
1799 13.101092338562012
1899 11.723705291748047
1999 10.791341781616211
Result: y = 0.029876399785280228 + 0.8233840465545654 x + -0.005154176615178585 x^2 + -0.08858566731214523 x^3


PyTorch: 새 autograd Function 정의하기

In [None]:
import torch
import math 

class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)

dtype = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    P3 = LegendrePolynomial3.apply
    y_pred = a + b * P3(c + d * x)

    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    loss.backward()

    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')       

99 209.95834350585938
199 144.66018676757812
299 100.70249938964844
399 71.03519439697266
499 50.97850799560547
599 37.403133392333984
699 28.206867218017578
799 21.97318458557129
899 17.7457275390625
999 14.877889633178711
1099 12.93176555633545
1199 11.610918998718262
1299 10.71425724029541
1399 10.10548210144043
1499 9.692106246948242
1599 9.411375045776367
1699 9.220745086669922
1799 9.091285705566406
1899 9.003360748291016
1999 8.943639755249023
Result: y = -5.394172664097141e-09 + -2.208526849746704 * P3(1.367587154632588e-09 + 0.2554861009120941 x)


nn 모듈

PyTorch: nn

In [1]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    model.zero_grad()

    loss.backward()

    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 497.10687255859375
199 331.89813232421875
299 222.59548950195312
399 150.27806091308594
499 102.42875671386719
599 70.76786041259766
699 49.817420959472656
799 35.953643798828125
899 26.77890968322754
999 20.70686912536621
1099 16.688098907470703
1199 14.028114318847656
1299 12.267328262329102
1399 11.101727485656738
1499 10.330058097839355
1599 9.819159507751465
1699 9.480855941772461
1799 9.256818771362305
1899 9.108449935913086
1999 9.010183334350586
Result: y = -0.0025961073115468025 + 0.8434478044509888 x + 0.0004478720366023481 x^2 + -0.09143956750631332 x^3


PyTorch: optim

In [2]:
import torch
import math

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    model.zero_grad()

    loss.backward()
    optimizer.step()

    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 674.759521484375
199 452.4388427734375
299 304.4631652832031
399 205.93035888671875
499 140.29251098632812
599 96.54932403564453
699 67.38446807861328
799 47.93082809448242
899 34.94902038574219
999 26.282032012939453
1099 20.493247985839844
1199 16.625064849853516
1299 14.0392427444458
1399 12.310041427612305
1499 11.153270721435547
1599 10.379240036010742
1699 9.861223220825195
1799 9.514505386352539
1899 9.282461166381836
1999 9.12718391418457
Result: y = 0.01023090910166502 + 0.8424351215362549 x + -0.0017573571531102061 x^2 + -0.09129662811756134 x^3


PyTorch: 사용자 정의 nn.Module

In [3]:
import torch
import math

class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
    def forward(self, x):
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 4289.7958984375
199 2949.095947265625
299 2030.5523681640625
399 1400.51513671875
499 967.8756713867188
599 670.4500122070312
699 465.7509765625
799 324.7137451171875
899 227.4333038330078
999 160.26220703125
1099 113.83246612548828
1199 81.70651245117188
1299 59.45521926879883
1399 44.028160095214844
1499 33.322296142578125
1599 25.885812759399414
1699 20.715686798095703
1799 17.118074417114258
1899 14.612548828125
1999 12.866193771362305
Result: y = 0.06086656451225281 + 0.8302542567253113 x + -0.010500495322048664 x^2 + -0.08956290036439896 x^3


PyTorch: 제어 흐름(Control Flow) + 가중치 공유(Weight Sharing)

In [6]:
import torch
import math
import random

class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))
    def forward(self, x):
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = DynamicNet()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    y_pred = model(x)

    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 1460.166015625
199 2113.19189453125
299 1260.23486328125
399 1059.268798828125
499 3443.470703125
599 935.2164306640625
699 892.4635620117188
799 863.6214599609375
899 7107.908203125
999 61923.484375
1099 10532.5107421875
1199 1412.1810302734375
1299 698.7078857421875
1399 704.0103759765625
1499 680.237060546875
1599 656.8225708007812
1699 822.0445556640625
1799 614.1790161132812
1899 1245.191650390625
1999 659.97216796875
2099 562.4525756835938
2199 608.2973022460938
2299 517.9310302734375
2399 529.1103515625
2499 472.28741455078125
2599 458.1561279296875
2699 486.37603759765625
2799 439.2335205078125
2899 410.04388427734375
2999 398.82171630859375
3099 395.3590087890625
3199 378.1028747558594
3299 357.55950927734375
3399 360.342041015625
3499 355.5804748535156
3599 330.61627197265625
3699 328.99847412109375
3799 313.289306640625
3899 610.9669799804688
3999 292.3359375
4099 328.7821960449219
4199 258.6946716308594
4299 265.078369140625
4399 256.62200927734375
4499 248.1965179443359