In [1]:
from __future__ import print_function
import torch

In [2]:
x = torch.Tensor(5,3)
print(x)


1.00000e-36 *
  0.0000  0.0000  0.0000
  0.0000  5.6886  0.0000
  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000
  5.7186  0.0000  0.0000
[torch.FloatTensor of size 5x3]



Construct a randomly initialized matrix

In [6]:
x = torch.rand(5,3)
print(x)


 0.6540  0.3439  0.7411
 0.3482  0.0477  0.3684
 0.4323  0.5606  0.6292
 0.5270  0.3122  0.9339
 0.9407  0.8461  0.6374
[torch.FloatTensor of size 5x3]



In [7]:
#get size
print(x.size())

torch.Size([5, 3])


#### Operations

In [8]:
#addition

#syntax 1
y = torch.rand(5,3)
print(x + y)


 1.0177  0.8002  1.7019
 0.5623  0.8811  0.8897
 0.5863  0.8955  0.7202
 0.7640  0.5124  1.0128
 1.9275  1.3658  0.8027
[torch.FloatTensor of size 5x3]



In [9]:
#syntax 2
print(torch.add(x,y))


 1.0177  0.8002  1.7019
 0.5623  0.8811  0.8897
 0.5863  0.8955  0.7202
 0.7640  0.5124  1.0128
 1.9275  1.3658  0.8027
[torch.FloatTensor of size 5x3]



In [11]:
#providing tensor as argument
result = torch.Tensor(5,3)
torch.add(x,y,out=result)
print(result)


 1.0177  0.8002  1.7019
 0.5623  0.8811  0.8897
 0.5863  0.8955  0.7202
 0.7640  0.5124  1.0128
 1.9275  1.3658  0.8027
[torch.FloatTensor of size 5x3]



In [12]:
#in place
y.add_(x)
print(y)


 1.0177  0.8002  1.7019
 0.5623  0.8811  0.8897
 0.5863  0.8955  0.7202
 0.7640  0.5124  1.0128
 1.9275  1.3658  0.8027
[torch.FloatTensor of size 5x3]



....

In [14]:
print(x[:,1])


 0.3439
 0.0477
 0.5606
 0.3122
 0.8461
[torch.FloatTensor of size 5]



#### Resizing: if you want to resize/reshape tensor, you can use `torch.view`

In [15]:
x = torch.randn(4,4)

y = x.view(16)
z = x.view(-1,8) # -1 is inferred from other dimensions (8)
a = x.view(-1,2)

print(x.size(), y.size(), z.size(), a.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8]) torch.Size([8, 2])


### Convert torch tensor to numpy

In [16]:
a = torch.ones(5)
print(a)


 1
 1
 1
 1
 1
[torch.FloatTensor of size 5]



In [17]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [19]:
a.add_(1)
print(a)
print(b)


 2
 2
 2
 2
 2
[torch.FloatTensor of size 5]

[2. 2. 2. 2. 2.]


In [20]:
# numpy to tensor

import numpy as np
a = np.ones(5)
b =torch.from_numpy(a)
np.add(a,1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]

 2
 2
 2
 2
 2
[torch.DoubleTensor of size 5]



In [21]:
# let us run this cell only if CUDA is available
if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    x + y

  return self.add(other)


### Deep Learning for NLP 

The concept of a computation graph is essential to efficient deep learning programming, because it allows you to not have to write back propagation gradients yourself.

A computation graph is simply a specification of how your data is combined to give you the output.

Since the graph totally specifies what parameters were involved with which operations, it contains enough information to compute derivatives. This probably sounds vague, so lets see what is going on using the fundamental class of Pytorch: autograd.Variable

Think from a programmers perspective. What is stored in the torch.Tensor objects we were creating above? Obviously the data and the shape, and maybe a few other things. But when we added two tensors together, we got an output tensor. All this output tensor knows is its data and shape. It has no idea that it was the sum of two other tensors (it could have been read in from a file, it could be the result of some other operation, etc...)

The Variable class keeps track of how it was created. Lets see it in action.

In [27]:
# variables wrap tensor objects
x = autograd.Variable(torch.Tensor([1.,2.,3.]), requires_grad=True)

# you can access the data with the .data attribute
print(x.data)

# you can also do all the same operations you did with tensors with Variables
y = autograd.Variable(torch.Tensor([4.,5.,6.]), requires_grad=True)
z = x+y
print(z.data)

# BUT z knows something extra
print(z.grad_fn)
print(x.grad_fn)








 1
 2
 3
[torch.FloatTensor of size 3]


 5
 7
 9
[torch.FloatTensor of size 3]

<AddBackward1 object at 0x000001A207396EF0>
None


So Variables know what created them. z knows that it wasn't read in from a file, it wasn't the result of a multiplication or exponential operation or whatever. And if you keep following z.grad_fn, you will find yourself at x and y

In [33]:
print(y)
print(z)

Variable containing:
 4
 5
 6
[torch.FloatTensor of size 3]

Variable containing:
 5
 7
 9
[torch.FloatTensor of size 3]



How does that help us compute gradient?

In [31]:
s = z.sum()
print(s)
print(s.grad_fn)

Variable containing:
 21
[torch.FloatTensor of size 1]

<SumBackward0 object at 0x000001A2073FE0B8>


In [34]:
s.backward() # calling .backward() on any variable will run backprop, starting from it.


In [37]:
print(x.grad)

Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]



In [38]:
print(x.grad_fn)

None


In [39]:
x = torch.randn((2, 2))
y = torch.randn((2, 2))
z = x + y  # These are Tensor types, and backprop would not be possible

var_x = autograd.Variable(x, requires_grad=True)
var_y = autograd.Variable(y, requires_grad=True)
# var_z contains enough information to compute gradients, as we saw above
var_z = var_x + var_y
print(var_z.grad_fn)

<AddBackward1 object at 0x000001A20741ADA0>


In [40]:
x = torch.randn((2, 2))
y = torch.randn((2, 2))
z = x + y  # These are Tensor types, and backprop would not be possible

var_x = autograd.Variable(x, requires_grad=True)
var_y = autograd.Variable(y, requires_grad=True)
# var_z contains enough information to compute gradients, as we saw above
var_z = var_x + var_y
print(var_z.grad_fn)

var_z_data = var_z.data  # Get the wrapped Tensor object out of var_z...
# Re-wrap the tensor in a new variable
new_var_z = autograd.Variable(var_z_data)

# ... does new_var_z have information to backprop to x and y?
# NO!
print(new_var_z.grad_fn)

<AddBackward1 object at 0x000001A207429320>
None


In the example above, we see that at first var_z contains the gradient information. In new_var_z however, this information is no longer there, since we've redefined 

In [41]:
var_z_data


-1.2231 -1.5287
-0.4699  0.2091
[torch.FloatTensor of size 2x2]

In [42]:
var_z.data


-1.2231 -1.5287
-0.4699  0.2091
[torch.FloatTensor of size 2x2]

In [43]:
print(var_z.grad_fn)

<AddBackward1 object at 0x000001A20741AAC8>


In [44]:
print(new_var_z)

Variable containing:
-1.2231 -1.5287
-0.4699  0.2091
[torch.FloatTensor of size 2x2]



In [45]:
print(new_var_z.grad)

None
