## Tensors in pytorch

In [3]:
from __future__ import print_function
import torch

In [5]:
x = torch.empty(5,3)
print(x)

tensor([[-1.5555e+03,  3.0718e-41,  5.0447e-44],
        [ 0.0000e+00,         nan,  1.3567e-19],
        [ 1.3788e-14,  3.6423e-06,  2.0699e-19],
        [ 3.3738e-12,  7.4086e+28,  6.9397e+22],
        [ 1.7260e+25,  2.2856e+20,  5.0948e-14]])


In [6]:
x = torch.rand(5,3)
print(x)

tensor([[ 0.7056,  0.2027,  0.8255],
        [ 0.9718,  0.0176,  0.0086],
        [ 0.5865,  0.7960,  0.3420],
        [ 0.0570,  0.9665,  0.0342],
        [ 0.0725,  0.9505,  0.4820]])


In [8]:
x = torch.zeros(5,3,dtype=torch.long)
print(x)

tensor([[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]])


In [12]:
result = torch.empty(5,3)
x = torch.rand(5,3,dtype=torch.float)
y = torch.rand(5,3,dtype=torch.float)
torch.add(x,y,out=result)
print(result)

tensor([[ 1.2319,  0.4312,  0.9586],
        [ 0.6441,  1.4629,  0.7452],
        [ 1.1250,  0.6566,  0.9320],
        [ 0.5526,  0.7018,  0.2179],
        [ 0.8541,  0.9487,  1.4652]])


In [13]:
#check if gpu is available
torch.cuda.is_available()

True

## PyTorch NN example
With single hidden layer. Following exmaple from https://github.com/jcjohnson/pytorch-examples

In [24]:
import torch

#enter dimentions. N is batch size, H is neurons in hidden unit
N, D_in, H, D_out = 32, 1000, 100, 10

x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

model = torch.nn.Sequential(
            torch.nn.Linear(D_in,H),
            torch.nn.ReLU(),
            torch.nn.Linear(H,D_out)
        )

loss_fn = torch.nn.MSELoss(size_average=False)
learning_rate = 1e-4

optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
for t in range(500):
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    if t%100 == 0:
        print(t, loss.item())
    
    #set the gradients to zero
    loss.backward()
    
    optimizer.step()

0 351.7732849121094
100 60.04761505126953
200 50.15278625488281
300 68.70606231689453
400 67.22235870361328


## PyTorch example with autograd
We will create a network with one hidden layer, using autograd

In [25]:
import torch

#set the device
device = torch.device('cpu')
#device = torch.device('gpu')

#dimensions of inputs and outputs. N is batch size
N, D_in, H, D_out = 64, 1000, 100, 10

#Create input and output
x = torch.randn(N,D_in,device=device)
y = torch.randn(N,D_out,device=device)

#initialize the weights
w1 = torch.randn(D_in, H,device=device,requires_grad=True)
w2 = torch.randn(H, D_out, device=device, requires_grad=True)

learning_rate = 1e-6

for t in range(500):
    #predict y
    y_pred = x.mm(w1).clamp(min=0).mm(w2)
    
    #get the loss
    loss = (y_pred-y).pow(2).sum()
    print(t, loss.item())
    
    #compute gradients
    loss.backward()
    
    with torch.no_grad():
        w1 -= w1.grad * learning_rate
        w2 -= w2.grad * learning_rate
        
        #set the gradients to zero
        w1.grad.zero_()
        w2.grad.zero_()


0 36486284.0
1 35232400.0
2 35184760.0
3 30626186.0
4 21789094.0
5 12603797.0
6 6590522.0
7 3505887.25
8 2091113.375
9 1419107.5
10 1064328.375
11 848205.8125
12 699167.625
13 587410.375
14 499453.21875
15 428094.78125
16 369262.875
17 320198.125
18 278939.34375
19 243886.390625
20 214081.578125
21 188600.0
22 166712.96875
23 147807.625
24 131425.28125
25 117162.6171875
26 104709.015625
27 93792.6484375
28 84184.7734375
29 75707.421875
30 68218.2109375
31 61581.3984375
32 55688.94140625
33 50442.3359375
34 45756.3671875
35 41561.265625
36 37800.36328125
37 34425.81640625
38 31389.767578125
39 28656.662109375
40 26188.078125
41 23955.828125
42 21935.046875
43 20102.482421875
44 18437.87890625
45 16927.97265625
46 15555.525390625
47 14305.275390625
48 13165.34375
49 12125.1513671875
50 11174.5537109375
51 10304.916015625
52 9508.9296875
53 8779.173828125
54 8109.8427734375
55 7495.47509765625
56 6931.111328125
57 6412.6083984375
58 5936.15576171875
59 5497.61767578125
60 5093.91748046875