### ✨활성화 함수
- 주요 활성화 함수
    - 시그모이드 함수 : nn.Sigmoid()
    - ReLU 함수 : nn.ReLU()
    - Leaky ReLU 함수 : nn.LeakyReLU()

In [1]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, output_dim)
        self.activation = nn.Sigmoid() # 시그모이드 함수
        # self.activation = nn.LeakyReLU(0.1)
        
    def forward(self, x):
        return self.activation(self.linear(x))

In [2]:
x = torch.ones(4)
y = torch.zeros(3)
model = LinearRegressionModel(4, 3)
loss_function = nn.MSELoss()

In [3]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs+1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [4]:
print(loss)
for param in model.parameters():
    print(param)

tensor(0.0254, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-0.2139, -0.7097,  0.0569, -0.8059],
        [-0.4430, -0.6965, -0.3392,  0.1508],
        [-0.8683, -0.0714, -0.4342, -0.3089]], requires_grad=True)
Parameter containing:
tensor([ 0.0367, -0.3654,  0.0231], requires_grad=True)


### ✨다층 레이어 구현
- input layer -> hidden layer -> output layer 순으로 작성
- activation function 적용은 output layer에는 적용하지 않는 것이 일반적

In [5]:
import torch
import torch.nn as nn

class LinearRegressionModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.linear1 = nn.Linear(input_dim, 10)
        self.linear2 = nn.Linear(10, 10)
        self.linear3 = nn.Linear(10, 10)
        self.linear4 = nn.Linear(10, output_dim)
        self.activation = nn.LeakyReLU(0.1)
    
    def forward(self, x):
        hidden = self.activation(self.linear1(x))
        hidden = self.activation(self.linear2(hidden))
        hidden = self.activation(self.linear3(hidden))
        y = self.linear4(hidden)
        return y

In [6]:
x = torch.ones(4)
y = torch.zeros(3)
model = LinearRegressionModel(4, 3)
loss_function = nn.MSELoss()

In [7]:
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs+1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [8]:
print(loss)
for param in model.parameters():
    print(param)

tensor(4.6739e-11, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[ 0.1389,  0.1026, -0.4651, -0.3142],
        [-0.2240,  0.4407,  0.0532,  0.3024],
        [ 0.1379, -0.4305, -0.0043, -0.1019],
        [-0.1317, -0.1706,  0.2142,  0.3331],
        [-0.1119,  0.3032, -0.3819,  0.2917],
        [ 0.1023, -0.0334, -0.4315, -0.4222],
        [ 0.2257, -0.0095,  0.1118, -0.4338],
        [ 0.4957, -0.2891, -0.0560, -0.3820],
        [ 0.2727, -0.4982,  0.3680,  0.3386],
        [ 0.3361, -0.3046, -0.4299,  0.4957]], requires_grad=True)
Parameter containing:
tensor([-0.4683,  0.3656,  0.1940, -0.2059, -0.0510,  0.4191, -0.0126, -0.1036,
        -0.1953, -0.2730], requires_grad=True)
Parameter containing:
tensor([[ 0.1302,  0.2372, -0.1786,  0.2355,  0.0726, -0.2157, -0.0991,  0.0952,
         -0.2556,  0.0239],
        [-0.0883,  0.2224, -0.1213, -0.1631, -0.0672, -0.0076, -0.2234, -0.2663,
          0.1261,  0.2796],
        [-0.0761, -0.2767,  0.0496, -0.2582, -0.2831,  0.1996

### ✨nn.Sequential
- nn.Sequential은 순서를 갖는 모듈의 컨테이너를 의미

In [9]:
x = torch.ones(4)
y = torch.zeros(3)

input_dim = x.size(0)
output_dim = y.size(0)

model = nn.Sequential(
nn.Linear(input_dim, 10),
nn.LeakyReLU(0.1),
nn.Linear(10, 10),
nn.LeakyReLU(0.1),
nn.Linear(10, output_dim)
)

In [10]:
loss_function = nn.MSELoss()
learning_rate = 0.01
nb_epochs = 1000
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(nb_epochs+1):
    y_pred = model(x)
    loss = loss_function(y_pred, y)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
print(loss)
for param in model.parameters():
    print(param)

tensor(8.8085e-09, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[-1.6592e-01, -4.0879e-01, -1.6137e-01, -5.8060e-02],
        [ 5.3031e-02,  6.3167e-02, -2.7856e-01, -2.3568e-01],
        [-9.9009e-02, -1.7502e-01,  2.5114e-01, -3.9851e-01],
        [-2.1113e-01, -2.9063e-01,  4.2643e-01,  1.6077e-01],
        [ 2.3279e-02,  1.6874e-01,  1.0100e-04, -3.3323e-01],
        [ 3.8658e-01, -3.7051e-01, -4.8802e-03, -1.3409e-01],
        [ 3.3883e-01,  3.5222e-01, -3.3682e-01, -3.2206e-01],
        [ 3.6824e-01,  4.2356e-01, -2.8808e-01,  2.9695e-01],
        [ 4.2302e-01,  2.8722e-01,  4.9477e-01, -3.2096e-01],
        [ 1.9488e-02, -2.3716e-02, -2.9596e-02, -3.5286e-03]],
       requires_grad=True)
Parameter containing:
tensor([-0.3591,  0.0102, -0.2327, -0.4964, -0.2917, -0.1369, -0.0362, -0.2401,
        -0.2351,  0.3620], requires_grad=True)
Parameter containing:
tensor([[ 0.0423, -0.0899, -0.0994, -0.1143, -0.1039, -0.3007,  0.2995,  0.2768,
         -0.0430, -0.1954],
   

### ✨SGD 방식 구현
- torch.randperm(n) : 0 ~ n-1 까지의 정수를 랜덤하게 섞어서, 순열(배열)을 만들어 줌
- torch.index_select(텐서객체, 차원번호, 인덱스텐서)

In [11]:
data1 = torch.randn(3, 4)
print(data1)
indices = torch.tensor([1, 2])
print(indices)
print(torch.index_select(data1, 0, indices))
print(torch.index_select(data1, 1, indices))

tensor([[-1.0967, -0.5852, -0.5123,  1.1497],
        [-1.7728, -0.3127,  0.7205,  0.3598],
        [-0.2598, -1.0256, -1.7208,  0.4393]])
tensor([1, 2])
tensor([[-1.7728, -0.3127,  0.7205,  0.3598],
        [-0.2598, -1.0256, -1.7208,  0.4393]])
tensor([[-0.5852, -0.5123],
        [-0.3127,  0.7205],
        [-1.0256, -1.7208]])


### ✨테스트 구현

In [13]:
x = torch.ones(5000, 10) # 10개의 feature
y = torch.zeros(5000, 1) # 1 output
learning_rate = 0.01
nb_epochs = 1000
minibatch_size = 256

In [15]:
input_dim = x.size(-1)
output_dim = y.size(-1)

model = nn.Sequential(
nn.Linear(input_dim, 10),
nn.LeakyReLU(0.1),
nn.Linear(10, 8),
nn.LeakyReLU(0.1),
nn.Linear(8, 6),
nn.LeakyReLU(0.1),
nn.Linear(6, output_dim)
)

loss_function = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [21]:
indices = torch.randperm(x.size(0)) # 5000개
print(indices)

x_batch_list = torch.index_select(x, 0, index=indices)
y_batch_list = torch.index_select(y, 0, index=indices)

tensor([1089, 4926,  931,  ..., 1944,   46,  806])


In [22]:
x_batch_list.shape

torch.Size([5000, 10])

In [23]:
x_batch_list = x_batch_list.split(minibatch_size, dim=0)
y_batch_list = y_batch_list.split(minibatch_size, dim=0)
print(len(x_batch_list), len(y_batch_list))

20 20


In [24]:
type(x_batch_list)

tuple

In [25]:
type(x_batch_list[0])

torch.Tensor

In [26]:
x_batch_list[0]

tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]])

In [29]:
for index in range(nb_epochs):
    indices = torch.randperm(x.size(0))
    
    # create minibatch data
    x_batch_list = torch.index_select(x, 0, indices)
    y_batch_list = torch.index_select(y, 0, indices)
    x_batch_list = x_batch_list.split(minibatch_size, 0)
    y_batch_list = y_batch_list.split(minibatch_size, 0)
    
    for x_minibatch, y_minibatch in zip(x_batch_list, y_batch_list):
        y_minibatch_pred = model(x_minibatch)
        loss = loss_function(y_minibatch_pred, y_minibatch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
print(loss)
for param in model.parameters():
    print(param)

tensor(1.6043e-14, grad_fn=<MseLossBackward0>)
Parameter containing:
tensor([[ 0.1558,  0.1666,  0.3064, -0.3107, -0.1156,  0.0339,  0.1937,  0.1341,
         -0.1442,  0.0917],
        [ 0.1964,  0.0721,  0.0859, -0.0116, -0.0390, -0.0367,  0.0913, -0.1025,
          0.0800,  0.2576],
        [-0.0839, -0.2230, -0.1228, -0.0858,  0.0704,  0.0710, -0.1071,  0.0589,
         -0.1921, -0.2351],
        [-0.2625,  0.1542, -0.1768,  0.3035,  0.1953,  0.1973,  0.1517, -0.0912,
          0.2466,  0.0516],
        [-0.3125,  0.1076,  0.2082, -0.3138, -0.0640, -0.2924,  0.1090, -0.2874,
          0.1780, -0.2514],
        [ 0.2394,  0.0782, -0.1326, -0.0098,  0.0624,  0.2265, -0.0611,  0.0950,
         -0.0236, -0.1109],
        [ 0.0040, -0.2577,  0.0679,  0.1062,  0.0894, -0.2055, -0.2988,  0.1236,
         -0.0570, -0.1466],
        [ 0.3149,  0.2359,  0.1590,  0.0936,  0.2787,  0.2053, -0.0488, -0.1447,
          0.2216,  0.1051],
        [-0.0189, -0.2028,  0.2359, -0.1947, -0.0017,  0.27