In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.nn import functional as F

# Lab2-1. PyTorch Tensor

### Tensor: 다차원 배열

In [None]:
x = torch.randn(2)        # 1차원 벡터
y = torch.randn(3, 4)     # 2차원 행렬
z = torch.randn(4, 5, 6)  # 3차원 텐서
print(x.shape)
print(y.shape)
print(z.shape)

### tensor indexing (index가 0부터 시작한다는 점에 유의할 것)

In [None]:
x = torch.tensor(
    [[0.0, 1.0, 2.0, 3.0],
     [4.0, 5.0, 6.0, 7.0],
     [8.0, 9.0, 10.0, 11.0]]
)  # x.shape: [3, 4]
print(f"x: {x}")
print(f"x[1, 3]: {x[1, 3]}")
print(f"x[1]: {x[1]}")
print(f"x[1, :]: {x[1, :]}")
print(f"x[:, 3]: {x[:, 3]}")

### unsqueeze

In [None]:
x = torch.tensor([1.0, 2.0, 3.0])
print(x.shape)

x = x.unsqueeze(1)
print(x.shape)

x = x.unsqueeze(0)
print(x.shape)

### sampling from a normal distribution

In [None]:
x = torch.randn(3)
print(x)

In [None]:
x = torch.randn(10000)
plt.hist(x, bins=25)  # x를 histogram 그기기 (막대바 개수 = 25)
plt.show()

### Element-wise addition, multiplication

In [None]:
x = torch.tensor([1.0, 2.0, 3.0])
y = torch.tensor([2.0, 3.0, 4.0])
print("add:", x + y)
print("mul:", x * y)

### vector dot-product (https://pytorch.org/docs/main/generated/torch.matmul.html)

In [None]:
x = torch.tensor([1.0, 2.0, 3.0])
y = torch.tensor([2.0, 3.0, 4.0])
print(x @ y)
print((x * y).sum())

### matrix multiplication

In [None]:
x = torch.tensor(
    [[1.0, 1.0, 1.0, 1.0],
     [2.0, 2.0, 2.0, 2.0],
     [3.0, 3.0, 3.0, 3.0]]
)            # x.shape: [3, 4]
y = torch.tensor(
    [[1.0, 2.0],
     [1.0, 2.0],
     [1.0, 2.0],
     [1.0, 2.0]]
)           # y.shape: [4, 2]
z = x @ y   # z.shape: [3, 2]
print("x.shape:", x.shape)
print("y.shape:", y.shape)
print("z.shape:", z.shape)
print(z)

# Lab2-2. Perceptron (AND Gate)

In [None]:
dataset = [
    {'x': torch.tensor([0.0, 0.0]), 'y': torch.tensor([0.0])},
    {'x': torch.tensor([0.0, 1.0]), 'y': torch.tensor([0.0])},
    {'x': torch.tensor([1.0, 0.0]), 'y': torch.tensor([0.0])},
    {'x': torch.tensor([1.0, 1.0]), 'y': torch.tensor([1.0])},
]

weight = torch.tensor([5.0, 5.0])  # weight.shape = [2]
bias = torch.tensor([-7.5])        # bias.shape = [1]

print(f"weight: {weight.tolist()}, bias: {bias.tolist()}")
for data in dataset:
    x = data['x']    # x.shape = [2]
    y = data['y']    # y.shape = [1]
    y_hat = torch.sigmoid(weight @ x + bias)    # @: dot product
    print(f"입력: {x.tolist()}, 출력: {y_hat.item():.4f}, 정답: {y.item()}")

# Lab2-3. Training a Perceptron (AND Gate)

In [None]:
# Prepare training data
x = torch.tensor([
    [0.0, 0.0],
    [0.0, 1.0],
    [1.0, 0.0],
    [1.0, 1.0]
]).cuda()    # x.shape: [4, 2]
y = torch.tensor([0.0, 0.0, 0.0, 1.0]).cuda() # y.shape: [4]

# Initialize parameters
weight = torch.randn(2).cuda()  # weight.shape = [2]
bias = torch.randn(1).cuda()    # bias.shape = [1]

print("===== Right after initialization =====")
print(f"weight: {weight.tolist()}, bias: {bias.tolist()}")
for i in range(4):
    x_i = x[i]    # x_i.shape = [2]
    y_i = y[i]    # y_i.shape = [1]
    y_hat = torch.sigmoid(weight @ x_i + bias)    # @: dot product
    print(f"입력: {x_i.tolist()}, 출력: {y_hat.item():.4f}, 정답: {y_i.item()}")

In [None]:
# Train
TOTAL_EPOCH = 1000
LR = 0.3

losses = []
for epoch in range(TOTAL_EPOCH):
    # forward propagation
    h = (x * weight).sum(dim=1) + bias  # h.shape: [4]
    y_hat = torch.sigmoid(h)            # y_hat.shape: [4]
    loss = (y_hat - y).square().mean()  # loss.shape: [1]
    losses.append(loss.item())

    # backpropagation (using chain rule)
    dL_dyhat = 2 * (y_hat - y) / 4      # shape: [4]
    dyhat_dh = y_hat * (1 - y_hat)      # shape: [4]
    dL_dh = dL_dyhat * dyhat_dh         # shape: [4]
    dL_dbias = dL_dh.sum()              # shape: [1]
    dL_dweight = (dL_dh.unsqueeze(1) * x).sum(dim=0)  # shape: [2]

    # gradient descent
    weight = weight - LR * dL_dweight
    bias = bias - LR * dL_dbias
plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

# Validate
print("===== After training =====")
print(f"weight: {weight.tolist()}, bias: {bias.tolist()}")
for i in range(4):
    x_i = x[i]    # x_i.shape = [2]
    y_i = y[i]    # y_i.shape = [1]
    y_hat = torch.sigmoid(weight @ x_i + bias)    # @: dot product
    print(f"입력: {x_i.tolist()}, 출력: {y_hat.item():.4f}, 정답: {y_i.item()}")

# Lab2-4. Training a Perceptron (AND Gate) (Without manual backpropagation)

In [None]:
# Prepare training data
x = torch.tensor([
    [0.0, 0.0],
    [0.0, 1.0],
    [1.0, 0.0],
    [1.0, 1.0]
]).cuda()
y = torch.tensor([0.0, 0.0, 0.0, 1.0]).cuda()

# Initialize parameters
weight = nn.Parameter(torch.randn(2).cuda())
bias = nn.Parameter(torch.randn(1).cuda())

# Train
TOTAL_EPOCH = 1000
LR = 0.3
losses = []
for _ in range(TOTAL_EPOCH):
    weight.grad = None
    bias.grad = None

    # forward propagation
    y_hat = torch.sigmoid((x * weight).sum(dim=1) + bias)
    loss = F.mse_loss(y_hat, y)
    losses.append(loss.item())

    # PyTorch's automatic backpropagation
    loss.backward()

    # gradient descent
    weight.data = weight.data - LR * weight.grad
    bias.data = bias.data - LR * bias.grad
plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

# Validate
print("===== After training =====")
print(f"weight: {weight.tolist()}, bias: {bias.tolist()}")
for i in range(4):
    x_i = x[i]    # x_i.shape = [2]
    y_i = y[i]    # y_i.shape = [1]
    y_hat = torch.sigmoid(weight @ x_i + bias)    # @: dot product
    print(f"입력: {x_i.tolist()}, 출력: {y_hat.item():.4f}, 정답: {y_i.item()}")

# Lab2-5. Training a Perceptron (AND Gate) (Without manual gradient descent)

In [None]:
# Prepare training data
x = torch.tensor([
    [0.0, 0.0],
    [0.0, 1.0],
    [1.0, 0.0],
    [1.0, 1.0]
]).cuda()
y = torch.tensor([0.0, 0.0, 0.0, 1.0]).cuda()

# Initialize parameters
weight = nn.Parameter(torch.randn(2).cuda())
bias = nn.Parameter(torch.randn(1).cuda())

# Initialize an optimizer (we use a stochastic gradient descent optimizer)
parameters = [weight, bias]
optimizer = torch.optim.SGD(parameters, lr=LR)

# Train
TOTAL_EPOCH = 1000
LR = 0.3
losses = []
for _ in range(TOTAL_EPOCH):
    optimizer.zero_grad()

    # forward propagation
    y_hat = torch.sigmoid((x * weight).sum(dim=1) + bias)
    loss = F.mse_loss(y_hat, y)
    losses.append(loss.item())

    # PyTorch's automatic backpropagation
    loss.backward()

    # gradient descent
    optimizer.step()
plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

# Validate
print("===== After training =====")
print(f"weight: {weight.tolist()}, bias: {bias.tolist()}")
for i in range(4):
    x_i = x[i]    # x_i.shape = [2]
    y_i = y[i]    # y_i.shape = [1]
    y_hat = torch.sigmoid(weight @ x_i + bias)    # @: dot product
    print(f"입력: {x_i.tolist()}, 출력: {y_hat.item():.4f}, 정답: {y_i.item()}")

# Lab2.6 Deep Neural Network (Regression)

In [None]:
from IPython.display import clear_output

# Prepare data
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1).cuda()  # x data (tensor), shape=(100, 1)
y = x.pow(2) + 0.2*torch.rand(x.size()).cuda()                 # noisy y data (tensor), shape=(100, 1)

# Define a network class
class DNN(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super().__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
        self.predict = torch.nn.Linear(n_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

# Create a network
net = DNN(n_feature=1, n_hidden=10, n_output=1)
net.cuda()

# Define an optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss

for epoch in range(1, 201):
    optimizer.zero_grad()
    prediction = net(x)    # forward propagation
    loss = (prediction - y).square().mean() # mean squared error loss
    loss.backward()         # backpropagation
    optimizer.step()        # apply gradients

    if epoch % 5 == 0:
        # plot and show learning process
        clear_output(wait=True)
        plt.cla()
        plt.title(f"Epoch {epoch}")
        plt.scatter(x.cpu().numpy(), y.cpu().numpy())
        plt.plot(x.cpu().numpy(), prediction.data.cpu().numpy(), 'r-', lw=5)
        plt.text(0.5, 0, f'Loss={loss.item():.4f}', fontdict={'size': 20, 'color':  'red'})
        plt.xlabel("x")
        plt.ylabel("y")
        plt.show()