In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [11]:
class Net(nn.Module):
    
    # 네트워크를 만듬 
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,6,5) ## kernel 5X5 를 6개 만들겠다
        self.conv2 = nn.Conv2d(6,16,5) ## 앞의 커널에서 받은 6개가 그대로 input이니깐 6입력이고 16개로 뱉어냄

        #full connect Layer = 행렬곱하기 위해 행렬을 펼친다.
        self.fc1 = nn.Linear( 16 * 5 * 5, 120) ## (input, output)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear( 84,10) # 종단이 10개면 10개의 값이 떨어진다 = 확률값 
        
        # convolation -> relu 태우는게 칸셉 
    def forward(self, x): 
        x = self.conv1(x)
        x = F.relu(x) # max (0, sum(x)) = Activation 함수 = 임계치를 떄리는 것 
        x = F.max_pool2d(x , (2,2)) # 이미지면 반토막 -> 8:8 -> 4:4
        # -- 하나의 layer ( 고차원 정보가 압축되어 온다)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x , 2)
      
        x = x.view(-1, self.num_flat_features(x))
        #  5x5 16개를 펼치는 것 -> view  
        x = F.relu(self.fc1(x)) # 
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:] # 배치로 들어오거등~~ 
        num_features = 1
        for s in size:
            num_features *= s 
        return num_features


In [12]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [14]:
params = list(net.parameters())
print(len(params))

10


In [17]:
params[1]

Parameter containing:
tensor([ 0.1158, -0.1764, -0.0002,  0.1903,  0.0690, -0.0289], requires_grad=True)

In [20]:
input = torch.randn(1, 1, 32, 32) #( a (batch_size),channel(rgb) , x,y,)

print(input)

tensor([[[[-0.4334,  2.5762,  0.3825,  ...,  0.3720, -1.0306, -0.8456],
          [ 0.4027, -0.3544,  0.9523,  ..., -0.8310,  1.3807, -1.9828],
          [-1.4653, -1.5742,  0.7675,  ..., -0.9692,  0.4233,  0.7785],
          ...,
          [-0.1355, -1.2120, -0.4455,  ..., -0.1715, -0.3538,  0.1652],
          [-0.6448,  0.7632, -0.0495,  ...,  1.3095, -0.7303, -1.1509],
          [ 1.4057,  1.3656, -0.9698,  ...,  1.8364, -0.6895,  0.5243]]]])


In [23]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out) # 확률값도 아닌 값... 그래서 softmax같은 것으로 정규화를 해야함 

tensor([[ 0.0539, -0.0532, -0.0059, -0.1140,  0.1550,  0.0069,  0.1842,  0.0498,
          0.0126, -0.1531]], grad_fn=<ThAddmmBackward>)


In [27]:
out.backward(torch.randn(1,10))

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [29]:
output = net(input)
target = torch.rand(10)
target = target.view(1,-1)
target

tensor([[0.5620, 0.8175, 0.8076, 0.7002, 0.4486, 0.7082, 0.2335, 0.9801, 0.2551,
         0.2967]])

In [33]:
criterion = nn.MSELoss()
loss = criterion(output,target)
print('output',output)
print('loss',loss)

output tensor([[ 0.0539, -0.0532, -0.0059, -0.1140,  0.1550,  0.0069,  0.1842,  0.0498,
          0.0126, -0.1531]], grad_fn=<ThAddmmBackward>)
loss tensor(0.4048, grad_fn=<MseLossBackward>)


In [41]:
loss.grad_fn.next_functions.count

<function tuple.count>

In [34]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU
print(loss.grad_fn.next_functions[0][0].next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x0000029F9D237A20>
<ThAddmmBackward object at 0x0000029F9D237F60>
<ExpandBackward object at 0x0000029F9D237A20>
<AccumulateGrad object at 0x0000029F9D237E80>


In [42]:
net.zero_grad()
print(net.conv1.bias.grad)
loss.backward()
print(net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])
tensor([ 0.0009,  0.0009,  0.0007, -0.0046,  0.0033, -0.0008])


In [44]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
    

In [45]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr =0.01)
x.zero_grad()
output = net(input)

loss = criterion(output,target)
loss.backward()
optimizer.step

<bound method SGD.step of SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)>