In [1]:
import torch

In [2]:
torch.__version__

'1.0.0.dev20181014'

### Random

In [13]:
x = torch.rand(5, 3)
print(x)


 0.1762  0.9099  0.2967
 0.1739  0.5473  0.4969
 0.7902  0.6606  0.9394
 0.9505  0.5490  0.3846
 0.0921  0.8280  0.6790
[torch.FloatTensor of size 5x3]



### fill

In [28]:
torch.Tensor(3, 4).fill_(3.0)

tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])

### Tensor

In [8]:
a = torch.tensor([1,2])

In [10]:
torch.pow(a,2)

tensor([1, 4])

### Convert to scalar

In [13]:
x = torch.randn(1)
print(x)
print(x.item())

tensor([0.4682])
0.46823084354400635


### Dot product

In [29]:
A = torch.tensor([[1,2], [3,4]])
B = torch.tensor([[1,2], [3,4]])
A.mm(B)

tensor([[ 7, 10],
        [15, 22]])

### Back prop

#### requires_grad

In [None]:
x = torch.ones(2, 2, requires_grad=True)
# or set it later
a.requires_grad_(True)

#### grad & backward
backward() have to call on shape (1,1) tensor

In [13]:
x = torch.Tensor(2,2).fill_(4)
x.requires_grad_(True)
y = 1234*x
y.sum().backward()
x.grad

tensor([[1234., 1234.],
        [1234., 1234.]])

In [117]:
z = x**2
z.grad_fn

<PowBackward0 at 0x111bb4278>

#### torch.no_grad()
temporary set all "requires_grad" to false. But doesn't affect the existing graph

In [154]:
x = torch.randn(3, requires_grad=True)
y = (x ** 2)
print(x.requires_grad)
print(y.requires_grad)

with torch.no_grad():
    print(x.requires_grad)
    print(y.requires_grad)
    print((x ** 2).requires_grad)

True
True
True
True
False


#### [example2] torch.no_grad() 

In [248]:
a = torch.Tensor([[10,20],
              [30,40]])
a.requires_grad_(True)

# try to uncoment (and comment the one outside) to see the different
d = a ** 3
with torch.no_grad():
    b = d**2
    print('---')
# b = a**2
print(b.requires_grad)
# -----------------------------------

c = torch.sum(b + a)

c.backward()
a.grad

---
False


tensor([[1., 1.],
        [1., 1.]])

##### grad.zero_()
Every time a variable is back propogated through, the gradient will be accumulated instead of being replaced. (This makes it easier for rnn, because each module will be back propogated through several times.)

In [None]:
with torch.no_grad():
    w1 -= learning_rate * w1.grad
    w2 -= learning_rate * w2.grad

    # Manually zero the gradients after updating weights
    w1.grad.zero_()
    w2.grad.zero_()

### Type & Shape

In [118]:
x.dtype

torch.float32

#### convert from numpy

In [162]:
input = np.linspace(0,20,21, dtype=np.float32).reshape(-1,1)
input = torch.from_numpy(input)

#### convert to numpy

In [181]:
input.numpy()
# or using
input.data.numpy() # if the value is require_grad

array([[ 0.],
       [ 1.],
       [ 2.],
       [ 3.],
       [ 4.],
       [ 5.],
       [ 6.],
       [ 7.],
       [ 8.],
       [ 9.],
       [10.],
       [11.],
       [12.],
       [13.],
       [14.],
       [15.],
       [16.],
       [17.],
       [18.],
       [19.],
       [20.]], dtype=float32)

##### reshape & change type

In [164]:
x = torch.arange(0, 22, 1).view(-1, 1).float()

### Max

In [160]:
x = torch.Tensor([1,2,3])
zero = torch.Tensor([0])
torch.max(zero, x)

tensor([1., 2., 3.])

### Detach
- ```tensor.detach()``` copy tensor and break computational graph (does not require grad) 
- ```tensor.clone()``` copy tensor also computational graph

### Functional API (F)
 functional API (F.dropout), you have to set the training flag yourself

### Device
2 places to add device
1. model
```
simpleNet = Net().to(device)
```
2. data 
```
for i, (x, y) in enumerate(data_loader_train):
        x, y = x.to(device), y.to(device)
```

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')