# a_single_neuron.py

In [15]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [16]:
device = torch.device("cpu")

In [17]:
class SimpleDataset(Dataset):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        X = [[0.5, 0.9], [14.0, 12.0], [15.0, 13.6],
             [28.0, 22.8], [11.0, 8.1], [8.0, 7.1],
             [3.0, 2.9], [4.0, 0.1], [6.0, 5.3],
             [13.0, 12.0], [21.0, 19.9], [-1.0, 1.5]]

        y = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4, 29.1]

        self.X = torch.tensor(X, dtype=torch.float, device=device)
        self.y = torch.tensor(y, dtype=torch.float, device=device)
        self.y = self.y * 0.01

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return {'input': self.X[idx], 'target': self.y[idx]}

    def __str__(self):
        str = "Data Size: {0}, Input Shape: {1}, Target Shape: {2}".format(
            len(self.X), self.X.shape, self.y.shape
        )
        return str

- 2개의 센서에 대한 데이터 셋이라 가정
- self.X : 센서에 대한 데이터를 tensor로 변환 해당 크기는 ([12,2])
- self.y : 센서에 대한 데이터 변환 결과를 tensor로 변환 해당 크기는 ([12])
- self.y * 0.01 : 해당 결과에 대한 정규화


In [18]:
def model(X, W, b):
    u = torch.sum(X * W, dim=1) + b
    z = activate(u)
    return z

- 가중치 W를 곱한후 편향 b를 더해줌
- 가중치(W)는 각 입력값에 대하여 주어지므로 해당 크기는 ([2])가 나옴
- 편향(b)는 전체에 대한 합산이므로 1개의 값으로 주어짐

In [19]:
def activate(u):
    return F.sigmoid(u)

- 활성화 함수로 Sigmoid, Tonh, ReLU, .... 많은 종류가 있음
- 활성화 함수는 인공 신경망에서 각 뉴런의 출력을 결정하는 역할
- 뉴런의 입력값을 받아서 어떤 임계값을 기준으로 출력을 생성하거나 제어하는 역할
- 비선형성 : 활성화 함수는 비선형 함수로 이는 신경망이 복잡한 관계를 학습할 수 있도록 함 만약 활성화 함수가 선형이라면 여러 층을 쌓더라도 결과는 하나의 선형 변환으로 나타나게 되어 효과적으로 층을 여러 번 쌓는 것이 의미가 없음
- 신경망의 표현력 증가 : 선형 활성화 함수를 사용하면 신경망이 더 복잡한 함수를 근사할 수 있음

In [20]:
def loss_fn(y_pred, y):
    loss = torch.square(y_pred - y).mean()
    assert loss.shape == () or loss.shape == (1,)
    return loss

- 손실 함수 : 학습 및 딥 러닝에서 모델의 예측값과 실제 타겟 값 사이의 차이를 측정하는 함수
- 손실 함수의 값이 작을수록 모델의 예측이 타겟 값에 더 가깝다고 할 수 있음
- 손실 함수는 모델의 학습 과정에서 사용되어 최적화 알고리즘이 모델 파라미터를 조정
- 손실 함수의 종류는 MSE, MAE, .... 등이 존재

In [21]:
def gradient(W, b, X, y):
    y_pred = model(X, W, b)
    dl_dy = 2 * (y_pred - y)
    dl_dy = dl_dy.unsqueeze(dim=-1)  # dl_dy_pred.shape: [12, 1]

    dy_df = 1.0

    z = torch.sum(X * W, dim=-1) + b
    ds_dz = activate(z) * (1.0 - activate(z))
    ds_dz = ds_dz.unsqueeze(dim=-1)  # ds_dz_pred.shape: [12, 1]

    W_grad = torch.mean(dl_dy * dy_df * ds_dz * X, dim=0)
    b_grad = torch.mean(dl_dy * dy_df * ds_dz * 1.0, dim=0)

    return W_grad, b_grad

- y_pred : 예측값  
- 
- dl_dy : 손실함수의 도함수, 식은 아래와 같음
- MSE = $\frac{1}{n}\sum _{n}^{i=1}(y_{predic} - y)^{^{2}}$ 해당 식의 미분은 $2(y_{predic} - y)$
- 
- dy_df : 예측값에 대한 도함수 값은 1
- 
- ds_dz = 시그모이드 도함수, 식은 아래와 같은
- sigmoid' = sigmoid(z)(1-sigmoid(z))

In [22]:
def learn(W, b, train_data_loader):
    MAX_EPOCHS = 20_000
    LEARNING_RATE = 0.01

    for epoch in range(0, MAX_EPOCHS):
        batch = next(iter(train_data_loader))
        y_pred = model(batch["input"], W, b)
        loss = loss_fn(y_pred, batch["target"])

        W_grad, b_grad = gradient(W, b, batch["input"], batch["target"])

        if epoch % 100 == 0:
            print("[Epoch:{0:6,}] loss:{1:8.5f}, w0:{2:6.3f}, w1:{3:6.3f}, b:{4:6.3f}".format(
                epoch, loss.item(), W[0].item(), W[1].item(), b.item()), end=", ")
        print("W.grad: {0}, b.grad:{1}".format(W_grad, b_grad))

        W = W - LEARNING_RATE * W_grad
        b = b - LEARNING_RATE * b_grad

- epoch : 데이터 전체를 보는 횟수
- learing_rate : 학습률로 경사하강의 정도를 의미
- batch : 현제의 모델에서는 len(simple_dataset)
- y_pred : 현제의 W,b에 대한 예측 값
- loss : 레이블 값에 대한 예측 값의 손실률(작을 수록 학습이 잘됨)

In [23]:
def main():
    W = torch.ones((2,))
    b = torch.zeros((1,))

    simple_dataset = SimpleDataset()
    train_data_loader = DataLoader(dataset=simple_dataset, batch_size=len(simple_dataset))
    batch = next(iter(train_data_loader))

    y_pred = model(batch["input"], W, b)
    print(y_pred.shape)
    print(y_pred)

    loss = loss_fn(y_pred, batch["target"])
    print(loss)

    learn(W, b, train_data_loader)

In [24]:
if __name__ == "__main__":
    main()

torch.Size([12])
tensor([0.8022, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9973, 0.9837, 1.0000,
        1.0000, 1.0000, 0.6225])
tensor(0.2254)
[Epoch:     0] loss: 0.22539, w0: 1.000, w1: 1.000, b: 0.000, W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0021, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0021, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0021, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0021, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0021, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0021, 0.0311]), b.grad:tensor([0.0271])
W.grad: tensor([0.0021, 0.0311]), b.gra

W.grad: tensor([0.0117, 0.0270]), b.grad:tensor([0.0249])
W.grad: tensor([0.0117, 0.0270]), b.grad:tensor([0.0249])
W.grad: tensor([0.0117, 0.0270]), b.grad:tensor([0.0249])
W.grad: tensor([0.0117, 0.0270]), b.grad:tensor([0.0249])
W.grad: tensor([0.0118, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0118, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0118, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0118, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0118, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0118, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0119, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0119, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0119, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0119, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0119, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0119, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor([0.0120, 0.0269]), b.grad:tensor([0.0249])
W.grad: tensor

W.grad: tensor([0.0361, 0.0264]), b.grad:tensor([0.0252])
W.grad: tensor([0.0362, 0.0264]), b.grad:tensor([0.0253])
W.grad: tensor([0.0362, 0.0264]), b.grad:tensor([0.0253])
W.grad: tensor([0.0363, 0.0264]), b.grad:tensor([0.0253])
W.grad: tensor([0.0363, 0.0264]), b.grad:tensor([0.0253])
W.grad: tensor([0.0364, 0.0264]), b.grad:tensor([0.0253])
W.grad: tensor([0.0365, 0.0264]), b.grad:tensor([0.0253])
W.grad: tensor([0.0365, 0.0265]), b.grad:tensor([0.0253])
W.grad: tensor([0.0366, 0.0265]), b.grad:tensor([0.0253])
W.grad: tensor([0.0366, 0.0265]), b.grad:tensor([0.0253])
[Epoch: 1,600] loss: 0.19844, w0: 0.770, w1: 0.562, b:-0.404, W.grad: tensor([0.0367, 0.0265]), b.grad:tensor([0.0253])
W.grad: tensor([0.0367, 0.0265]), b.grad:tensor([0.0253])
W.grad: tensor([0.0368, 0.0265]), b.grad:tensor([0.0253])
W.grad: tensor([0.0369, 0.0265]), b.grad:tensor([0.0253])
W.grad: tensor([0.0369, 0.0265]), b.grad:tensor([0.0254])
W.grad: tensor([0.0370, 0.0265]), b.grad:tensor([0.0254])
W.grad: te

W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0069])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0069])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0069])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.grad:tensor([0.0068])
W.grad: tensor([-0.0003, -0.0002]), b.gr

W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.grad:tensor([0.0048])
W.grad: tensor([-0.0002, -0.0002]), b.gr

W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.grad:tensor([0.0034])
W.grad: tensor([-0.0001, -0.0001]), b.gr

W.grad: tensor([-9.1906e-05, -8.5021e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1945e-05, -8.5036e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1882e-05, -8.4985e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1817e-05, -8.4914e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1750e-05, -8.4858e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1737e-05, -8.4834e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1754e-05, -8.4832e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1643e-05, -8.4745e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1656e-05, -8.4738e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1645e-05, -8.4728e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1522e-05, -8.4606e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1480e-05, -8.4562e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1476e-05, -8.4549e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1525e-05, -8.4581e-05]), b.grad:tensor([0.0024])
W.grad: tensor([-9.1331e-05, -8.4413e-05]), b.gr

W.grad: tensor([-6.7772e-05, -5.9411e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7782e-05, -5.9415e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7656e-05, -5.9308e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7605e-05, -5.9250e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7635e-05, -5.9277e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7567e-05, -5.9221e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7591e-05, -5.9222e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7693e-05, -5.9314e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7578e-05, -5.9212e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7606e-05, -5.9237e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7518e-05, -5.9154e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7423e-05, -5.9073e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7505e-05, -5.9134e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7579e-05, -5.9195e-05]), b.grad:tensor([0.0018])
W.grad: tensor([-6.7672e-05, -5.9274e-05]), b.gr

W.grad: tensor([-5.0381e-05, -4.2036e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0347e-05, -4.2019e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0315e-05, -4.1989e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0352e-05, -4.2015e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0244e-05, -4.1922e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0255e-05, -4.1918e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0241e-05, -4.1907e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0134e-05, -4.1815e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0093e-05, -4.1775e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0114e-05, -4.1799e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0039e-05, -4.1732e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0095e-05, -4.1773e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-4.9975e-05, -4.1671e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-4.9927e-05, -4.1633e-05]), b.grad:tensor([0.0013])
W.grad: tensor([-5.0060e-05, -4.1752e-05]), b.gr

W.grad: tensor([-3.6941e-05, -2.9549e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.7000e-05, -2.9576e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6884e-05, -2.9487e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6976e-05, -2.9569e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6867e-05, -2.9472e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6930e-05, -2.9520e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6804e-05, -2.9409e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6853e-05, -2.9450e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6922e-05, -2.9511e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6806e-05, -2.9406e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6905e-05, -2.9504e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6765e-05, -2.9381e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6828e-05, -2.9426e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6962e-05, -2.9554e-05]), b.grad:tensor([0.0009])
W.grad: tensor([-3.6755e-05, -2.9375e-05]), b.gr

W.grad: tensor([-2.7321e-05, -2.1119e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7312e-05, -2.1117e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7364e-05, -2.1157e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7290e-05, -2.1088e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7272e-05, -2.1063e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7302e-05, -2.1110e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7278e-05, -2.1088e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7324e-05, -2.1121e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7325e-05, -2.1135e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7301e-05, -2.1116e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7282e-05, -2.1098e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7300e-05, -2.1119e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7150e-05, -2.0975e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7140e-05, -2.0966e-05]), b.grad:tensor([0.0007])
W.grad: tensor([-2.7133e-05, -2.0960e-05]), b.gr

W.grad: tensor([-1.9866e-05, -1.4926e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9981e-05, -1.5025e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9968e-05, -1.5028e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9949e-05, -1.5003e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9860e-05, -1.4932e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9992e-05, -1.5041e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-2.0007e-05, -1.5056e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9911e-05, -1.4977e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9876e-05, -1.4938e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9953e-05, -1.5010e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9906e-05, -1.4968e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9977e-05, -1.5028e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9933e-05, -1.4991e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-2.0004e-05, -1.5060e-05]), b.grad:tensor([0.0005])
W.grad: tensor([-1.9935e-05, -1.4993e-05]), b.gr

W.grad: tensor([-1.4536e-05, -1.0669e-05]), b.grad:tensor([0.0004])
W.grad: tensor([-1.4637e-05, -1.0769e-05]), b.grad:tensor([0.0004])
W.grad: tensor([-1.4702e-05, -1.0823e-05]), b.grad:tensor([0.0004])
W.grad: tensor([-1.4620e-05, -1.0764e-05]), b.grad:tensor([0.0004])
W.grad: tensor([-1.4709e-05, -1.0823e-05]), b.grad:tensor([0.0004])
W.grad: tensor([-1.4605e-05, -1.0741e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4627e-05, -1.0771e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4661e-05, -1.0792e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4746e-05, -1.0874e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4722e-05, -1.0855e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4603e-05, -1.0751e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4553e-05, -1.0704e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4737e-05, -1.0869e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4593e-05, -1.0728e-05]), b.grad:tensor([0.0003])
W.grad: tensor([-1.4673e-05, -1.0803e-05]), b.gr

W.grad: tensor([-1.0664e-05, -7.7138e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0703e-05, -7.7505e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0773e-05, -7.8088e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0722e-05, -7.7672e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0824e-05, -7.8579e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0826e-05, -7.8628e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0694e-05, -7.7455e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0745e-05, -7.7945e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0741e-05, -7.7821e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0844e-05, -7.8790e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0655e-05, -7.7126e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0628e-05, -7.7002e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0756e-05, -7.8045e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0618e-05, -7.6940e-06]), b.grad:tensor([0.0003])
W.grad: tensor([-1.0673e-05, -7.7287e-06]), b.gr

W.grad: tensor([-7.7660e-06, -5.5594e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.8526e-06, -5.6314e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.8039e-06, -5.5768e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.7703e-06, -5.5457e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.9144e-06, -5.6854e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.8355e-06, -5.6066e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.7225e-06, -5.5103e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.7027e-06, -5.4923e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.7914e-06, -5.5737e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.7679e-06, -5.5581e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.8672e-06, -5.6500e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.8793e-06, -5.6463e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.8119e-06, -5.5842e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.7536e-06, -5.5339e-06]), b.grad:tensor([0.0002])
W.grad: tensor([-7.8840e-06, -5.6600e-06]), b.gr

W.grad: tensor([-5.7469e-06, -4.0817e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7872e-06, -4.1102e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.6264e-06, -3.9712e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7134e-06, -4.0506e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7438e-06, -4.0804e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.5941e-06, -3.9463e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.6705e-06, -4.0171e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7075e-06, -4.0376e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.6823e-06, -4.0246e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7326e-06, -4.0686e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7332e-06, -4.0630e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.6655e-06, -4.0177e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.6891e-06, -4.0277e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7034e-06, -4.0419e-06]), b.grad:tensor([0.0001])
W.grad: tensor([-5.7413e-06, -4.0817e-06]), b.gr

W.grad: tensor([-4.0137e-06, -2.8393e-06]), b.grad:tensor([9.7864e-05])
W.grad: tensor([-3.9929e-06, -2.8145e-06]), b.grad:tensor([9.7827e-05])
W.grad: tensor([-3.9799e-06, -2.8101e-06]), b.grad:tensor([9.7791e-05])
W.grad: tensor([-3.9941e-06, -2.8126e-06]), b.grad:tensor([9.7754e-05])
W.grad: tensor([-4.0059e-06, -2.8225e-06]), b.grad:tensor([9.7718e-05])
W.grad: tensor([-3.9519e-06, -2.7828e-06]), b.grad:tensor([9.7685e-05])
W.grad: tensor([-3.9289e-06, -2.7729e-06]), b.grad:tensor([9.7649e-05])
W.grad: tensor([-3.9469e-06, -2.7791e-06]), b.grad:tensor([9.7610e-05])
W.grad: tensor([-3.8569e-06, -2.7083e-06]), b.grad:tensor([9.7580e-05])
W.grad: tensor([-3.8526e-06, -2.6915e-06]), b.grad:tensor([9.7545e-05])
W.grad: tensor([-3.8603e-06, -2.7052e-06]), b.grad:tensor([9.7508e-05])
W.grad: tensor([-3.9612e-06, -2.7952e-06]), b.grad:tensor([9.7466e-05])
W.grad: tensor([-3.9370e-06, -2.7741e-06]), b.grad:tensor([9.7430e-05])
W.grad: tensor([-3.9128e-06, -2.7493e-06]), b.grad:tensor([9.739

W.grad: tensor([-2.9408e-06, -2.0682e-06]), b.grad:tensor([7.1412e-05])
W.grad: tensor([-2.9175e-06, -2.0415e-06]), b.grad:tensor([7.1384e-05])
W.grad: tensor([-2.8734e-06, -2.0135e-06]), b.grad:tensor([7.1360e-05])
W.grad: tensor([-2.9424e-06, -2.0595e-06]), b.grad:tensor([7.1329e-05])
W.grad: tensor([-2.8703e-06, -2.0048e-06]), b.grad:tensor([7.1306e-05])
W.grad: tensor([-3.0336e-06, -2.1383e-06]), b.grad:tensor([7.1269e-05])
W.grad: tensor([-2.9535e-06, -2.0756e-06]), b.grad:tensor([7.1249e-05])
W.grad: tensor([-3.0445e-06, -2.1495e-06]), b.grad:tensor([7.1214e-05])
W.grad: tensor([-2.8647e-06, -1.9980e-06]), b.grad:tensor([7.1198e-05])
W.grad: tensor([-2.8952e-06, -2.0166e-06]), b.grad:tensor([7.1171e-05])
W.grad: tensor([-2.8384e-06, -1.9694e-06]), b.grad:tensor([7.1145e-05])
W.grad: tensor([-2.9517e-06, -2.0725e-06]), b.grad:tensor([7.1112e-05])
W.grad: tensor([-2.9734e-06, -2.0893e-06]), b.grad:tensor([7.1083e-05])
W.grad: tensor([-2.9253e-06, -2.0545e-06]), b.grad:tensor([7.105

W.grad: tensor([-2.3618e-06, -1.7304e-06]), b.grad:tensor([5.2034e-05])
W.grad: tensor([-2.3587e-06, -1.7354e-06]), b.grad:tensor([5.2013e-05])
W.grad: tensor([-2.2991e-06, -1.6813e-06]), b.grad:tensor([5.1994e-05])
W.grad: tensor([-2.3755e-06, -1.7459e-06]), b.grad:tensor([5.1973e-05])
W.grad: tensor([-2.3854e-06, -1.7534e-06]), b.grad:tensor([5.1948e-05])
W.grad: tensor([-2.4096e-06, -1.7813e-06]), b.grad:tensor([5.1928e-05])
W.grad: tensor([-2.3153e-06, -1.7018e-06]), b.grad:tensor([5.1911e-05])
W.grad: tensor([-2.4065e-06, -1.7683e-06]), b.grad:tensor([5.1886e-05])
W.grad: tensor([-2.3060e-06, -1.6851e-06]), b.grad:tensor([5.1873e-05])
W.grad: tensor([-2.3240e-06, -1.7068e-06]), b.grad:tensor([5.1852e-05])
W.grad: tensor([-2.3550e-06, -1.7267e-06]), b.grad:tensor([5.1828e-05])
W.grad: tensor([-2.3519e-06, -1.7335e-06]), b.grad:tensor([5.1807e-05])
W.grad: tensor([-2.3320e-06, -1.7099e-06]), b.grad:tensor([5.1788e-05])
W.grad: tensor([-2.3749e-06, -1.7527e-06]), b.grad:tensor([5.176

W.grad: tensor([-1.4504e-06, -9.5367e-07]), b.grad:tensor([3.8057e-05])
W.grad: tensor([-1.4963e-06, -9.7665e-07]), b.grad:tensor([3.8041e-05])
W.grad: tensor([-1.5659e-06, -1.0480e-06]), b.grad:tensor([3.8018e-05])
W.grad: tensor([-1.5814e-06, -1.0642e-06]), b.grad:tensor([3.8008e-05])
W.grad: tensor([-1.6373e-06, -1.1083e-06]), b.grad:tensor([3.7990e-05])
W.grad: tensor([-1.7158e-06, -1.1853e-06]), b.grad:tensor([3.7970e-05])
W.grad: tensor([-1.5895e-06, -1.0642e-06]), b.grad:tensor([3.7967e-05])
W.grad: tensor([-1.5361e-06, -1.0257e-06]), b.grad:tensor([3.7953e-05])
W.grad: tensor([-1.6304e-06, -1.1101e-06]), b.grad:tensor([3.7935e-05])
W.grad: tensor([-1.6708e-06, -1.1505e-06]), b.grad:tensor([3.7918e-05])
W.grad: tensor([-1.6252e-06, -1.0934e-06]), b.grad:tensor([3.7905e-05])
W.grad: tensor([-1.6103e-06, -1.0822e-06]), b.grad:tensor([3.7896e-05])
W.grad: tensor([-1.6515e-06, -1.1269e-06]), b.grad:tensor([3.7880e-05])
W.grad: tensor([-1.5472e-06, -1.0431e-06]), b.grad:tensor([3.787

W.grad: tensor([-1.4255e-06, -1.1524e-06]), b.grad:tensor([2.7720e-05])
W.grad: tensor([-1.2234e-06, -9.6982e-07]), b.grad:tensor([2.7719e-05])
W.grad: tensor([-1.3206e-06, -1.0580e-06]), b.grad:tensor([2.7700e-05])
W.grad: tensor([-1.3293e-06, -1.0617e-06]), b.grad:tensor([2.7689e-05])
W.grad: tensor([-1.3579e-06, -1.0865e-06]), b.grad:tensor([2.7679e-05])
W.grad: tensor([-1.3358e-06, -1.0636e-06]), b.grad:tensor([2.7666e-05])
W.grad: tensor([-1.3473e-06, -1.0747e-06]), b.grad:tensor([2.7655e-05])
W.grad: tensor([-1.3051e-06, -1.0356e-06]), b.grad:tensor([2.7645e-05])
W.grad: tensor([-1.3625e-06, -1.0791e-06]), b.grad:tensor([2.7630e-05])
W.grad: tensor([-1.3057e-06, -1.0350e-06]), b.grad:tensor([2.7621e-05])
W.grad: tensor([-1.4063e-06, -1.1393e-06]), b.grad:tensor([2.7603e-05])
W.grad: tensor([-1.2427e-06, -9.8844e-07]), b.grad:tensor([2.7607e-05])
W.grad: tensor([-1.2613e-06, -9.9838e-07]), b.grad:tensor([2.7594e-05])
W.grad: tensor([-1.3175e-06, -1.0431e-06]), b.grad:tensor([2.758

W.grad: tensor([-6.1560e-07, -4.1537e-07]), b.grad:tensor([2.0290e-05])
W.grad: tensor([-5.7338e-07, -3.8991e-07]), b.grad:tensor([2.0284e-05])
W.grad: tensor([-6.1685e-07, -4.1661e-07]), b.grad:tensor([2.0274e-05])
W.grad: tensor([-5.9977e-07, -3.9985e-07]), b.grad:tensor([2.0268e-05])
W.grad: tensor([-6.1591e-07, -4.1785e-07]), b.grad:tensor([2.0262e-05])
W.grad: tensor([-5.9977e-07, -4.1599e-07]), b.grad:tensor([2.0255e-05])
W.grad: tensor([-5.9356e-07, -3.9116e-07]), b.grad:tensor([2.0252e-05])
W.grad: tensor([-5.3179e-07, -3.5514e-07]), b.grad:tensor([2.0247e-05])
W.grad: tensor([-6.5596e-07, -4.5138e-07]), b.grad:tensor([2.0231e-05])
W.grad: tensor([-6.4261e-07, -4.4455e-07]), b.grad:tensor([2.0225e-05])
W.grad: tensor([-7.1960e-07, -5.0788e-07]), b.grad:tensor([2.0215e-05])
W.grad: tensor([-6.0536e-07, -4.1227e-07]), b.grad:tensor([2.0214e-05])
W.grad: tensor([-6.3889e-07, -4.3896e-07]), b.grad:tensor([2.0205e-05])
W.grad: tensor([-6.4665e-07, -4.5262e-07]), b.grad:tensor([2.019

[Epoch:18,300] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.839, W.grad: tensor([-6.1095e-07, -3.1603e-07]), b.grad:tensor([1.4729e-05])
W.grad: tensor([-6.4448e-07, -3.4273e-07]), b.grad:tensor([1.4724e-05])
W.grad: tensor([-6.6807e-07, -3.6942e-07]), b.grad:tensor([1.4717e-05])
W.grad: tensor([-7.2146e-07, -4.2592e-07]), b.grad:tensor([1.4707e-05])
W.grad: tensor([-6.3827e-07, -3.4645e-07]), b.grad:tensor([1.4710e-05])
W.grad: tensor([-7.1929e-07, -4.2034e-07]), b.grad:tensor([1.4701e-05])
W.grad: tensor([-6.4106e-07, -3.5080e-07]), b.grad:tensor([1.4702e-05])
W.grad: tensor([-6.6962e-07, -3.7253e-07]), b.grad:tensor([1.4695e-05])
W.grad: tensor([-5.3582e-07, -2.6822e-07]), b.grad:tensor([1.4698e-05])
W.grad: tensor([-5.4948e-07, -2.7940e-07]), b.grad:tensor([1.4693e-05])
W.grad: tensor([-5.9915e-07, -3.1913e-07]), b.grad:tensor([1.4684e-05])
W.grad: tensor([-6.1778e-07, -3.3652e-07]), b.grad:tensor([1.4679e-05])
W.grad: tensor([-6.6124e-07, -3.7750e-07]), b.grad:tensor([1.4672e-05])
W.

W.grad: tensor([-5.2961e-07, -3.7129e-07]), b.grad:tensor([1.0998e-05])
W.grad: tensor([-6.2895e-07, -4.5821e-07]), b.grad:tensor([1.0984e-05])
W.grad: tensor([-6.3392e-07, -4.6814e-07]), b.grad:tensor([1.0981e-05])
W.grad: tensor([-4.1661e-07, -2.8188e-07]), b.grad:tensor([1.0991e-05])
W.grad: tensor([-5.2216e-07, -3.6694e-07]), b.grad:tensor([1.0981e-05])
W.grad: tensor([-5.5227e-07, -3.9985e-07]), b.grad:tensor([1.0971e-05])
W.grad: tensor([-4.3679e-07, -2.8747e-07]), b.grad:tensor([1.0976e-05])
W.grad: tensor([-6.1095e-07, -4.4952e-07]), b.grad:tensor([1.0959e-05])
W.grad: tensor([-4.5324e-07, -3.1168e-07]), b.grad:tensor([1.0964e-05])
W.grad: tensor([-4.0978e-07, -2.7195e-07]), b.grad:tensor([1.0963e-05])
W.grad: tensor([-5.6624e-07, -4.0295e-07]), b.grad:tensor([1.0946e-05])
W.grad: tensor([-4.9050e-07, -3.4521e-07]), b.grad:tensor([1.0945e-05])
W.grad: tensor([-5.7742e-07, -4.1599e-07]), b.grad:tensor([1.0939e-05])
W.grad: tensor([-5.4265e-07, -3.8867e-07]), b.grad:tensor([1.093

W.grad: tensor([-3.5452e-07, -3.4024e-07]), b.grad:tensor([8.1069e-06])
W.grad: tensor([-3.8929e-07, -3.7005e-07]), b.grad:tensor([8.1050e-06])
W.grad: tensor([-2.5767e-07, -2.6822e-07]), b.grad:tensor([8.1087e-06])
W.grad: tensor([-3.9302e-07, -3.8619e-07]), b.grad:tensor([8.1000e-06])
W.grad: tensor([-2.6760e-07, -2.8064e-07]), b.grad:tensor([8.1034e-06])
W.grad: tensor([-2.3780e-07, -2.5084e-07]), b.grad:tensor([8.1041e-06])
W.grad: tensor([-2.7629e-07, -2.8685e-07]), b.grad:tensor([8.0985e-06])
W.grad: tensor([-4.3027e-07, -4.0978e-07]), b.grad:tensor([8.0895e-06])
W.grad: tensor([-2.1948e-07, -2.2911e-07]), b.grad:tensor([8.0988e-06])
W.grad: tensor([-2.6915e-07, -2.7381e-07]), b.grad:tensor([8.0941e-06])
W.grad: tensor([-2.9026e-07, -3.0175e-07]), b.grad:tensor([8.0867e-06])
W.grad: tensor([-2.5549e-07, -2.6698e-07]), b.grad:tensor([8.0888e-06])
W.grad: tensor([-3.6632e-07, -3.6135e-07]), b.grad:tensor([8.0792e-06])
W.grad: tensor([-2.8436e-07, -2.9306e-07]), b.grad:tensor([8.082

# b_autograd_1.py

In [25]:
import torch

In [26]:
w = torch.ones(3, requires_grad=True)
print(w)
print(w.grad, w.grad_fn)

tensor([1., 1., 1.], requires_grad=True)
None None


- requires_grad : computational 그래프의 노드의 시작으로 만들겠다는 의미
- w의 grad와 grad_fn이 생성됨

In [27]:
c = torch.tensor([2])
x = w + c
print(x)
print(x.grad_fn)

tensor([3., 3., 3.], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x1743b8190>


- c는 requires_grad가 않되어 있지만 되어 있는 것과 더하면 true와 동일하게 적용이 됨
- 해당 c는 computational 그래프에 값이 적용 되지 않음
- 이때 grad는 동일하게 none이지만 grad_fn은 add를 통하여 만들어졌다는 것을 유지

In [28]:
y = x * 3
print(y)
print(y.grad_fn)

tensor([9., 9., 9.], grad_fn=<MulBackward0>)
<MulBackward0 object at 0x1743b83a0>


- 위와 동일하게 grad는 동일하게 none이지만 grad_fn은 mul을 통하여 만들어졌다는 것을 유지

In [29]:
z = y.mean()
print(z)
print(z.shape)
print(z.grad_fn)

tensor(9., grad_fn=<MeanBackward0>)
torch.Size([])
<MeanBackward0 object at 0x1743bf8b0>


- 위와 동일하게 grad는 동일하게 none이지만 grad_fn은 mean을 통하여 만들어졌다는 것을 유지

In [30]:
z.backward()

# print(z.grad)
# print(y.grad)
# print(x.grad)
print(w.grad)

tensor([1., 1., 1.])


- z.backward() : 통합 자동미분 이전의 연산들에 대하여 유지 하고 있음으로 가능
- grad의 접근에 대하여는 말단 노드에 대하여만 접근이 가능하며 중간의 노드에 대한 접근은 y.retain_grad()과 같은 방식으로 접근 가능

# c_autograd_2.py

In [31]:
import torch

In [32]:
a = torch.randn(2, 2)
print(a.requires_grad)

b = ((a * 3) / (a - 1))
print(b.grad_fn)

# b.backward()

False
None


- requires_grad -> false
- b.backward()는 그래프가 그려져 있지 않아 사용 할 수 없음

In [33]:
a.requires_grad_(True)

print(a.requires_grad)

c = (a * a).sum()
print(c.grad_fn)

c.backward()

print(a.grad)

True
<SumBackward0 object at 0x1743b81f0>
tensor([[ 1.6537, -0.9392],
        [-1.4761, -1.3360]])


- requires_grad_(true)로 requires_grad를 현제 상태부터 computation 그래프를 그릴 수 있음
- 그러므로 c.backward()가 가능

In [34]:
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad)
b = a.detach()
print(b.requires_grad)
print(b is a)

True
False
False


In [35]:
from torchviz import make_dot

In [36]:
x = torch.ones(3, requires_grad=True)
y = 2 * x
z = 3 + x
r = (y + z).sum()
make_dot(r).render("./img/torchviz_1", format="png")

# Detach
x = torch.ones(3, requires_grad=True)
y = 2 * x
z = 3 + x.detach()
r = (y + z).sum()
make_dot(r).render("./img/torchviz_2", format="png")

'img/torchviz_2.png'

In [37]:
a = torch.randn(2, 2, requires_grad=True)
b = a * 2
make_dot(b).render("./img/torchviz_3", format="png")

a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad)
with torch.no_grad():
  print(a.requires_grad)
  b = a * 2

print(a.requires_grad)
print(b.requires_grad)
make_dot(b).render("./img/torchviz_4", format="png")

True
True
True
False


'img/torchviz_4.png'

# d_autograd_3.py

In [38]:
import torch

In [39]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    output = (weights * 3).sum()
    print("[output {0}]:".format(epoch), output)
    output.backward()

    print("weights.grad:", weights.grad)

    # optimize model, i.e. adjust weights...
    with torch.no_grad():
        weights -= 0.1 * weights.grad
        # 'empty gradients' is important!
        # It affects the final weights & output
        weights.grad.zero_()

    print("weights:", weights)
    print()

output = (weights * 3).sum()
print("\n[output final]:", output)

[output 0]: tensor(12., grad_fn=<SumBackward0>)
weights.grad: tensor([3., 3., 3., 3.])
weights: tensor([0.7000, 0.7000, 0.7000, 0.7000], requires_grad=True)

[output 1]: tensor(8.4000, grad_fn=<SumBackward0>)
weights.grad: tensor([3., 3., 3., 3.])
weights: tensor([0.4000, 0.4000, 0.4000, 0.4000], requires_grad=True)

[output 2]: tensor(4.8000, grad_fn=<SumBackward0>)
weights.grad: tensor([3., 3., 3., 3.])
weights: tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)


[output final]: tensor(1.2000, grad_fn=<SumBackward0>)


In [40]:
weights = torch.ones(4, requires_grad=True)
optimizer = torch.optim.SGD([weights], lr=0.1)

for epoch in range(3):
    # just a dummy example
    output = (weights * 3).sum()
    print("[output {0}]:".format(epoch), output)
    output.backward()

    print("weights.grad:", weights.grad)

    # optimize model, i.e. adjust weights & empty gradients
    optimizer.step()
    optimizer.zero_grad()

    print("weights:", weights)
    print()

output = (weights * 3).sum()
print("\n[output final]:", output)

[output 0]: tensor(12., grad_fn=<SumBackward0>)
weights.grad: tensor([3., 3., 3., 3.])
weights: tensor([0.7000, 0.7000, 0.7000, 0.7000], requires_grad=True)

[output 1]: tensor(8.4000, grad_fn=<SumBackward0>)
weights.grad: tensor([3., 3., 3., 3.])
weights: tensor([0.4000, 0.4000, 0.4000, 0.4000], requires_grad=True)

[output 2]: tensor(4.8000, grad_fn=<SumBackward0>)
weights.grad: tensor([3., 3., 3., 3.])
weights: tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)


[output final]: tensor(1.2000, grad_fn=<SumBackward0>)


# e_single_neuron_autograd_1.py

In [44]:
import torch
from torch.utils.data import DataLoader

In [46]:
def learn(W, b, train_data_loader):
    MAX_EPOCHS = 20_000
    LEARNING_RATE = 0.01

    for epoch in range(0, MAX_EPOCHS):
        batch = next(iter(train_data_loader))
        y_pred = model(batch["input"], W, b)
        loss = loss_fn(y_pred, batch["target"])

        loss.backward()

    if epoch % 100 == 0:
        print("[Epoch:{0:6,}] loss:{1:8.5f}, w0:{2:6.3f}, w1:{3:6.3f}, b:{4:6.3f}".format(
            epoch, loss.item(), W[0].item(), W[1].item(), b.item()
        ), end=", ")
        print("W.grad: {0}, b.grad:{1}".format(W.grad, b.grad))

    with torch.no_grad():
        W -= LEARNING_RATE * W.grad
        b -= LEARNING_RATE * b.grad
        W.grad = None
        b.grad = None

In [47]:
def main():
    W = torch.ones((2,), requires_grad=True)
    b = torch.zeros((1,), requires_grad=True)

    simple_dataset = SimpleDataset()
    train_data_loader = DataLoader(dataset=simple_dataset, batch_size=len(simple_dataset))
    learn(W, b, train_data_loader)

In [48]:
if __name__ == "__main__":
    main()

# f_single_neuron_autograd_2.py

In [49]:
import torch
from torch.utils.data import DataLoader

In [50]:
def learn(W, b, train_data_loader):
    MAX_EPOCHS = 20_000
    LEARNING_RATE = 0.01

    from torch import optim
    optimizer = optim.SGD([W, b], lr=LEARNING_RATE)

    for epoch in range(0, MAX_EPOCHS):
        batch = next(iter(train_data_loader))
        y_pred = model(batch["input"], W, b)
        loss = loss_fn(y_pred, batch["target"])

        loss.backward()

        if epoch % 100 == 0:
            print("[Epoch:{0:6,}] loss:{1:8.5f}, w0:{2:6.3f}, w1:{3:6.3f}, b:{4:6.3f}".format(
                epoch, loss.item(), W[0].item(), W[1].item(), b.item()
            ), end=", ")
            print("W.grad: {0}, b.grad:{1}".format(W.grad, b.grad))

        optimizer.step()
        optimizer.zero_grad()

In [51]:
def main():
    W = torch.ones((2,), requires_grad=True)
    b = torch.zeros((1,), requires_grad=True)

    simple_dataset = SimpleDataset()
    train_data_loader = DataLoader(dataset=simple_dataset, batch_size=len(simple_dataset))
    learn(W, b, train_data_loader)

In [52]:
if __name__ == "__main__":
    main()

[Epoch:     0] loss: 0.22539, w0: 1.000, w1: 1.000, b: 0.000, W.grad: tensor([0.0020, 0.0311]), b.grad:tensor([0.0271])
[Epoch:   100] loss: 0.22369, w0: 0.998, w1: 0.969, b:-0.027, W.grad: tensor([0.0030, 0.0307]), b.grad:tensor([0.0269])
[Epoch:   200] loss: 0.22203, w0: 0.994, w1: 0.939, b:-0.054, W.grad: tensor([0.0040, 0.0302]), b.grad:tensor([0.0267])
[Epoch:   300] loss: 0.22041, w0: 0.989, w1: 0.909, b:-0.080, W.grad: tensor([0.0052, 0.0297]), b.grad:tensor([0.0264])
[Epoch:   400] loss: 0.21882, w0: 0.984, w1: 0.879, b:-0.107, W.grad: tensor([0.0064, 0.0291]), b.grad:tensor([0.0261])
[Epoch:   500] loss: 0.21727, w0: 0.977, w1: 0.850, b:-0.133, W.grad: tensor([0.0077, 0.0285]), b.grad:tensor([0.0258])
[Epoch:   600] loss: 0.21575, w0: 0.968, w1: 0.822, b:-0.158, W.grad: tensor([0.0092, 0.0279]), b.grad:tensor([0.0254])
[Epoch:   700] loss: 0.21425, w0: 0.958, w1: 0.795, b:-0.183, W.grad: tensor([0.0107, 0.0273]), b.grad:tensor([0.0251])
[Epoch:   800] loss: 0.21276, w0: 0.947,

[Epoch: 7,900] loss: 0.00239, w0: 0.006, w1: 0.088, b:-0.819, W.grad: tensor([-3.1244e-05, -2.4486e-05]), b.grad:tensor([0.0008])
[Epoch: 8,000] loss: 0.00239, w0: 0.006, w1: 0.088, b:-0.820, W.grad: tensor([-3.0141e-05, -2.3540e-05]), b.grad:tensor([0.0007])
[Epoch: 8,100] loss: 0.00239, w0: 0.006, w1: 0.088, b:-0.820, W.grad: tensor([-2.9135e-05, -2.2669e-05]), b.grad:tensor([0.0007])
[Epoch: 8,200] loss: 0.00239, w0: 0.006, w1: 0.088, b:-0.821, W.grad: tensor([-2.8015e-05, -2.1707e-05]), b.grad:tensor([0.0007])
[Epoch: 8,300] loss: 0.00239, w0: 0.006, w1: 0.089, b:-0.822, W.grad: tensor([-2.6938e-05, -2.0794e-05]), b.grad:tensor([0.0007])
[Epoch: 8,400] loss: 0.00239, w0: 0.006, w1: 0.089, b:-0.822, W.grad: tensor([-2.5916e-05, -1.9921e-05]), b.grad:tensor([0.0006])
[Epoch: 8,500] loss: 0.00239, w0: 0.006, w1: 0.089, b:-0.823, W.grad: tensor([-2.5061e-05, -1.9201e-05]), b.grad:tensor([0.0006])
[Epoch: 8,600] loss: 0.00239, w0: 0.006, w1: 0.089, b:-0.824, W.grad: tensor([-2.3990e-05,

[Epoch:14,400] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.837, W.grad: tensor([-2.7409e-06, -1.8878e-06]), b.grad:tensor([6.4826e-05])
[Epoch:14,500] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.838, W.grad: tensor([-2.7444e-06, -1.8887e-06]), b.grad:tensor([6.2352e-05])
[Epoch:14,600] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.838, W.grad: tensor([-2.5844e-06, -1.8002e-06]), b.grad:tensor([6.0096e-05])
[Epoch:14,700] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.838, W.grad: tensor([-2.6561e-06, -1.8915e-06]), b.grad:tensor([5.7822e-05])
[Epoch:14,800] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.838, W.grad: tensor([-2.3474e-06, -1.6615e-06]), b.grad:tensor([5.5683e-05])
[Epoch:14,900] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.838, W.grad: tensor([-2.2943e-06, -1.6447e-06]), b.grad:tensor([5.3646e-05])
[Epoch:15,000] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.838, W.grad: tensor([-2.4354e-06, -1.8049e-06]), b.grad:tensor([5.1596e-05])
[Epoch:15,100] loss: 0.00239, w0: 0.007, w1: 0.089, b:-0.838, 