In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import numpy as np
from collections import OrderedDict

torch.manual_seed(1)

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## 1. Create Tensor

In [4]:
# 파이썬 리스트로부터 생성

V_data = [1., 2., 3.] # 벡터
V = torch.Tensor(V_data)
print(V)

M_data = [[1., 2., 3.,], [4., 5., 6.]] # 행렬
M = torch.Tensor(M_data)
print(M)

T_data = [[[1.,2.], [3.,4.]], # 3차원 텐서
        [[5.,6.], [7.,8.]]]

T = torch.Tensor(T_data)
print(T)

tensor([1., 2., 3.])
tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])


In [6]:
T.tolist() # 반대로 파이토치 텐서 -> 파이썬 리스트

[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]

In [8]:
# numpy array 객체로부터 생성
V_data = np.array([1., 2., 3.]) # 벡터
V = torch.Tensor(V_data) # 바로 랩핑할 수 있음
print(V)

M_data = np.array([[1., 2., 3.], [4., 5., 6]]) # 행렬
M = torch.Tensor(M_data)
print(M)

T_data = np.array([[[1.,2.], [3.,4.]], # 3차원 텐서
                [[5.,6.], [7.,8.]]])
T = torch.Tensor(T_data)
print(T)

tensor([1., 2., 3.])
tensor([[1., 2., 3.],
        [4., 5., 6.]])
tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])


In [9]:
T.numpy() # 반대로 파이토치 텐서 -> 넘파이

array([[[1., 2.],
        [3., 4.]],

       [[5., 6.],
        [7., 8.]]], dtype=float32)

In [10]:
# 기타 생성 방법들
x = torch.zeros(2,3)
print(x)

x = torch.ones(2, 3)
print(x)

x = torch.rand(3, 4)
print(x)

x = torch.randn(3, 4) # 표준정규분포에서 샘플링
print(x)  # FloatTensor가 디폴트

x = torch.randperm(5) # permutation of integers from 0 to n - 1
print(x)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.7576, 0.2793, 0.4031, 0.7347],
        [0.0293, 0.7999, 0.3971, 0.7544],
        [0.5695, 0.4388, 0.6387, 0.5247]])
tensor([[-1.5228,  0.3817, -1.0276, -0.5631],
        [-0.8923, -0.0583, -0.1955, -0.9656],
        [ 0.4224,  0.2673, -0.4212, -0.5107]])
tensor([3, 0, 4, 1, 2])


## 2. Indexing, Slicing, Joining, Mutating Ops

http://pytorch.org/docs/0.3.0/torch.html?#indexing-slicing-joining-mutating-ops

In [11]:
x = torch.rand(3, 4, 5)
print(x)

tensor([[[0.9906, 0.2885, 0.8750, 0.5059, 0.2366],
         [0.7570, 0.2346, 0.6471, 0.3556, 0.4452],
         [0.0193, 0.2616, 0.7713, 0.3785, 0.9980],
         [0.9008, 0.4766, 0.1663, 0.8045, 0.6552]],

        [[0.1768, 0.8248, 0.8036, 0.9434, 0.2197],
         [0.4177, 0.4903, 0.5730, 0.1205, 0.1452],
         [0.7720, 0.3828, 0.7442, 0.5285, 0.6642],
         [0.6099, 0.6818, 0.7479, 0.0369, 0.7517]],

        [[0.1484, 0.1227, 0.5304, 0.4148, 0.7937],
         [0.2104, 0.0555, 0.8639, 0.4259, 0.7812],
         [0.6607, 0.1251, 0.6004, 0.6201, 0.1652],
         [0.2628, 0.6705, 0.5896, 0.2873, 0.3486]]])


In [12]:
x[0] # 직관적 인덱싱 가능

tensor([[0.9906, 0.2885, 0.8750, 0.5059, 0.2366],
        [0.7570, 0.2346, 0.6471, 0.3556, 0.4452],
        [0.0193, 0.2616, 0.7713, 0.3785, 0.9980],
        [0.9008, 0.4766, 0.1663, 0.8045, 0.6552]])

In [13]:
x[0][0]

tensor([0.9906, 0.2885, 0.8750, 0.5059, 0.2366])

In [14]:
x[0][0][0]

tensor(0.9906)

### torch.index_select

In [16]:
x = torch.randn(3, 4)
print(x)

indices = torch.LongTensor([0, 2])
print(indices) # 선택할 index (LongTensor)

print(torch.index_select(x, 0, indices)) # row 기준으로 index select
print(torch.index_select(x, 1, indices)) # column 기준으로 

tensor([[ 0.5009,  0.5438, -0.4057,  1.1341],
        [-1.1115,  0.3501, -0.7703, -0.1473],
        [ 0.6272,  1.0935,  0.0939,  1.2381]])
tensor([0, 2])
tensor([[ 0.5009,  0.5438, -0.4057,  1.1341],
        [ 0.6272,  1.0935,  0.0939,  1.2381]])
tensor([[ 0.5009, -0.4057],
        [-1.1115, -0.7703],
        [ 0.6272,  0.0939]])


### torch.masked_select

In [18]:
x = torch.randn(3, 4)
print(x)

mask = x.ge(0.5) # x 텐서에서 0.5보다 크거나 같은 값 마스킹(boolean) (ByteTensor)
print(mask)

print(torch.masked_select(x, mask)) # 마스킹 된 값 선택

tensor([[-3.7687e-01, -3.1020e+00, -9.9467e-02, -7.2126e-01],
        [ 1.2708e+00, -2.0225e-03, -1.0952e+00,  6.0165e-01],
        [ 6.9841e-01, -8.0052e-01,  1.5381e+00,  1.4673e+00]])
tensor([[False, False, False, False],
        [ True, False, False,  True],
        [ True, False,  True,  True]])
tensor([1.2708, 0.6016, 0.6984, 1.5381, 1.4673])


### torch.cat ➡️ concat : 두 텐서를 붙인다 (Concatentation)

In [19]:
# row-wise concat
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 = torch.cat([x_1, y_1]) # default: 첫 번째 차원 기준으로 (0)
print(z_1)

# column-wise concat
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
z_2 = torch.cat([x_2, y_2], 1) # 두 번째 인자로 기준이 될 차원 축 선택
print(z_2)

tensor([[ 1.5951, -1.5279,  1.0156, -0.2020, -1.2865],
        [ 0.8231, -0.6101, -1.2960, -0.9434,  0.6684],
        [ 1.1628, -0.3229,  1.8782, -0.5666,  0.4016],
        [-0.1153,  0.3170,  0.5629,  0.8662, -0.3528],
        [ 0.3482,  1.1371, -0.3339, -1.4724,  0.7296]])
tensor([[-0.1312, -0.6368,  1.0429,  1.6015, -1.0735, -1.2173,  0.6472, -0.0412],
        [ 0.4903,  1.0318, -0.5989, -0.1775, -0.5000,  0.8673, -0.2732, -0.4608]])


### torch.view ➡️ reshape

In [20]:
x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12)) # 2x12로 reshape
print(x.view(2, -1)) # -1: 각 차원을 다 곱한 값에서 명시적인 차원수로 나눠서 추론

tensor([[[-0.8336, -1.1929, -2.3065,  0.6037],
         [ 0.1794,  0.1447, -0.3589,  0.4793],
         [ 1.0476, -0.3176,  0.1395,  2.3403]],

        [[-0.6116,  0.8160,  0.2477, -0.3867],
         [ 0.1995,  0.7993, -0.2619,  0.1513],
         [ 1.1982, -2.2833, -1.0130, -0.8879]]])
tensor([[-0.8336, -1.1929, -2.3065,  0.6037,  0.1794,  0.1447, -0.3589,  0.4793,
          1.0476, -0.3176,  0.1395,  2.3403],
        [-0.6116,  0.8160,  0.2477, -0.3867,  0.1995,  0.7993, -0.2619,  0.1513,
          1.1982, -2.2833, -1.0130, -0.8879]])
tensor([[-0.8336, -1.1929, -2.3065,  0.6037,  0.1794,  0.1447, -0.3589,  0.4793,
          1.0476, -0.3176,  0.1395,  2.3403],
        [-0.6116,  0.8160,  0.2477, -0.3867,  0.1995,  0.7993, -0.2619,  0.1513,
          1.1982, -2.2833, -1.0130, -0.8879]])


### torch.squeeze

In [21]:
x = torch.zeros(2, 1, 2, 1)
print(x)
print(x.squeeze()) # 차원 size = 1인 차원을 제거 => 2x2
print(x.squeeze(1)) # 차원 수 명시 가능 => 2x2x1

tensor([[[[0.],
          [0.]]],


        [[[0.],
          [0.]]]])
tensor([[0., 0.],
        [0., 0.]])
tensor([[[0.],
         [0.]],

        [[0.],
         [0.]]])


### torch.unsqueeze

In [46]:
x = torch.randn(5)
print(x)

print(x.unsqueeze(0)) # squeeze의 반대 => size=1인 차원을 추가
print(x.unsqueeze(1)) # range of [-2, 1]

tensor([-0.0766, -0.8562, -0.7870, -0.8161,  0.5470])
tensor([[-0.0766, -0.8562, -0.7870, -0.8161,  0.5470]])
tensor([[-0.0766],
        [-0.8562],
        [-0.7870],
        [-0.8161],
        [ 0.5470]])


## 3. Match opteration
http://pytorch.org/docs/0.3.0/torch.html?#math-operations

### add

In [47]:
x = torch.Tensor([1., 2., 3.]) 
y = torch.Tensor([4., 5., 6.])
z = x + y # torch.add(x,y)
print(z)
print(torch.add(x, y))

tensor([5., 7., 9.])
tensor([5., 7., 9.])


### sum

In [48]:
x = torch.Tensor([1, 2, 3])
print(x.sum())

tensor(6.)


### dot product

In [49]:
x = torch.Tensor([1, 2, 3])
y = torch.Tensor([4, 5, 6])
z = x.dot(y) # torch.dot(x, y)
print(z)
print(torch.dot(x, y))


tensor(32.)
tensor(32.)


### mul

In [51]:
x = torch.Tensor([1, 2, 3])
y = torch.Tensor([4, 5, 6])
z = x.mul(y)
print(z)

tensor([ 4., 10., 18.])


### mm: matrix multiplication

In [54]:
x = torch.randn(2, 2)
y = torch.randn(2, 3)
z = x.mm(y) # torch.mm(x, y)
print(z)
print(torch.mm(x, y))

tensor([[-1.6054, -1.3538,  2.5214],
        [ 1.3804,  2.5942, -2.6082]])
tensor([[-1.6054, -1.3538,  2.5214],
        [ 1.3804,  2.5942, -2.6082]])


### max

In [58]:
x = torch.Tensor([[1.,2.],[3.,4.]])
print(x.max())
print(x.max(1))

tensor(4.)
torch.return_types.max(
values=tensor([2., 4.]),
indices=tensor([1, 1]))


## 4. Computation Graphs and Automatic Differentiation
딥러닝 프레임워크의 가장 큰 장점인 **자동 미분 기능**을 사용하기 위해서는 `Variable`, `Parameter` 클래스로 래핑해야 함

In [62]:
from torch.autograd import Variable
# Variable: 텐서를 래핑한다 (autograd 기능에 의한 미분값의 필요 여부를 결정할 수 있음)
x = Variable(torch.Tensor([1, 2, 3]), requires_grad=True)
print(x)

# Variable로 감싸준 상태에서 .data로 원래 텐서에 접근
print(x.data)

# Variable로 래핑한 다른 텐서들과도 연산 가능
y = Variable(torch.Tensor([4, 5, 6]), requires_grad=True)
z = torch.add(x, y)
print(z.data)

# 하지만 연산의 결과인 z는 뭔가 추가적인 정보를 알고 있음!
print(z.grad_fn)

tensor([1., 2., 3.], requires_grad=True)
tensor([1., 2., 3.])
tensor([5., 7., 9.])
<AddBackward0 object at 0x127f06220>


In [63]:
s = z.sum()
print(s)
print(s.grad_fn)

tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x12656c580>


In [64]:
s.backward() # 이 값으로부터 backpropagation을 하고 싶다면 => .backward()를 call
print(x.grad)

tensor([1., 1., 1.])


<div align='center'>

$s =\overbrace{x_0 + y_0}^{z_0} +\overbrace{x_1 + y_1}^{z_1} + \overbrace{x_2 + y_2}^{z_2}$

$\frac{\partial s}{\partial x_0}$
</div>

### Variable vs Parameter

- Variable : 학습 및 모델 추론에 <u>필요한 변수</u>
- Parameter : 학습을 통해 <u>찾아야 할 값</u>

In [65]:
x = torch.Tensor([1, 2, 3])
vx = Variable(x)

In [67]:
vx.requires_grad

False

In [68]:
import torch.nn as nn

px = nn.Parameter(x) # 학습 가능한 파라미터 (가중치와 편향과 같은)

In [69]:
print(px.data)

tensor([1., 2., 3.])


In [70]:
px.requires_grad

True

## 5. torch.nn

### nn.Linear (Affine Maps)
$f(x) = Ax + b$

Linear를 취할 때 정수형은 float형으로 만들어줘야 함


In [71]:
lin = nn.Linear(5, 3) # (input size, output size), maps from R^5 to R^3, paramters A, b
data = Variable(torch.randn(2, 5))
print(lin(data)) # (2x3)

tensor([[-0.1898, -1.2431, -0.6487],
        [ 1.1364,  0.1638, -1.0311]], grad_fn=<AddmmBackward0>)


### 비선형 함수들 (Non-Linearities)

In [73]:
import torch.nn.functional as F

data = Variable(torch.randn(2, 2))
print(data)
print(F.relu(data))
print(torch.tanh(data))
print(torch.sigmoid(data))

tensor([[-0.7592,  1.0220],
        [-1.3939, -0.9680]])
tensor([[0.0000, 1.0220],
        [0.0000, 0.0000]])
tensor([[-0.6406,  0.7707],
        [-0.8840, -0.7478]])
tensor([[0.3188, 0.7354],
        [0.1988, 0.2753]])


### Softmax

In [76]:
# Softmax is also in torch.functional
data = Variable(torch.randn(5))
print(data)
print(F.softmax(data))
print(F.softmax(data).sum()) # Sums to 1 because it is a distribution
print(F.log_softmax(data))

tensor([-0.4116, -0.6755, -0.2887,  0.6900,  0.3158])
tensor([0.1254, 0.0963, 0.1417, 0.3772, 0.2594])
tensor(1.)
tensor([-2.0766, -2.3406, -1.9537, -0.9750, -1.3492])


  print(F.softmax(data))
  print(F.softmax(data).sum()) # Sums to 1 because it is a distribution
  print(F.log_softmax(data))


## 6. Containers

### nn.Module
파이토치는 모델을 클래스처럼 다룰 수 있는데, `torch.nn.Module`을 상속받아서 부모 클래스를 초기화하는 방법

선정의된 함수 `forward`에 Variable을 인자 값으로 보내면 `forward` 계산을 하면서 Parameter와의 backward 계산을 해서 가지고 있는다

In [84]:
class simpleNN(nn.Module):
    def __init__(self):
        super(simpleNN, self).__init__() # 부모 클래스까지 초기화(부모 클래스 생성자 호출)
        self.linear = nn.Linear(2, 2)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, inputs):
        return self.sigmoid(self.linear(inputs))

In [85]:
snn = simpleNN()

In [86]:
snn

simpleNN(
  (linear): Linear(in_features=2, out_features=2, bias=True)
  (sigmoid): Sigmoid()
)

### 모듈 파라미터 혹은 서브 모듈에 접근

In [88]:
for param in snn.parameters():
    print(param)

Parameter containing:
tensor([[ 0.2609, -0.2352],
        [ 0.5190,  0.1624]], requires_grad=True)
Parameter containing:
tensor([-0.2174,  0.6302], requires_grad=True)


In [89]:
for param in snn.named_parameters():
    print(param) # (name, parameters) tuple

('linear.weight', Parameter containing:
tensor([[ 0.2609, -0.2352],
        [ 0.5190,  0.1624]], requires_grad=True))
('linear.bias', Parameter containing:
tensor([-0.2174,  0.6302], requires_grad=True))


In [91]:
for param in snn.named_parameters():
    if "weight" in param[0]:
        print(param[1])

Parameter containing:
tensor([[ 0.2609, -0.2352],
        [ 0.5190,  0.1624]], requires_grad=True)


In [92]:
for child in snn.children():
    print(child) # module

Linear(in_features=2, out_features=2, bias=True)
Sigmoid()


### forward

In [93]:
inputs = Variable(torch.randn(1, 2))
inputs

tensor([[1.7090, 0.2287]])

In [94]:
outputs = snn(inputs)
print(outputs)

tensor([[0.5436, 0.8255]], grad_fn=<SigmoidBackward0>)


### nn.Sequential()

In [95]:
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid())

In [96]:
model

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Sigmoid()
)

In [97]:
outputs = model(inputs)
print(outputs)

tensor([[0.7369, 0.5595]], grad_fn=<SigmoidBackward0>)


In [99]:
model = nn.Sequential(OrderedDict([
    ('conv1', nn.Conv2d(1, 20, 5)),
    ('relu1', nn.ReLU()),
    ('conv2', nn.Conv2d(20, 64, 5)),
    ('relu2', nn.ReLU())           
]))

### Loss function
http://pytorch.org/docs/0.3.0/nn.html#loss-functions

In [100]:
loss_function = nn.MSELoss()

In [104]:
nn.CrossEntropyLoss()

CrossEntropyLoss()

### Optimizer

In [102]:
optimizer = optim.SGD(snn.parameters(), lr=0.01)

In [103]:
optimizer.param_groups

[{'params': [Parameter containing:
   tensor([[ 0.2609, -0.2352],
           [ 0.5190,  0.1624]], requires_grad=True),
   Parameter containing:
   tensor([-0.2174,  0.6302], requires_grad=True)],
  'lr': 0.01,
  'momentum': 0,
  'dampening': 0,
  'weight_decay': 0,
  'nesterov': False,
  'maximize': False,
  'foreach': None,
  'differentiable': False}]