In [1]:
import torch

In [2]:
tensor = torch.Tensor(
    [
        [[1, 2], [3, 4]],
        [[5, 6], [7, 8]],
        [[9, 10], [11, 12]]
    ]
)

In [3]:
tensor

tensor([[[ 1.,  2.],
         [ 3.,  4.]],

        [[ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.]]])

In [6]:
tensor.shape

torch.Size([3, 2, 2])

In [7]:
tensor.device

device(type='cpu')

In [8]:
tensor.shape[0]

3

In [9]:
tensor.size(1)

2

In [11]:
tensor.size(2)

2

In [13]:
print("Rank =", len(tensor.shape))
print("Number of elements =", tensor.numel())

Rank = 3
Number of elements = 12


Accessing tensor

In [17]:
tensor[1]

tensor([[5., 6.],
        [7., 8.]])

Accessing specific element

In [18]:
tensor[2, 0, 1]

tensor(10.)

In [20]:
tensor[2, 0, 1].item() # scalar value

10.0

To get 0th element of each dimension of the tensor

In [23]:
tensor[:, 0, 0]

tensor([1., 5., 9.])

Initializing Tensors

In [24]:
# same dimensions as some other tensor
torch.ones_like(tensor)

tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])

In [25]:
torch.zeros_like(tensor)

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

To generate tensor using normal distribution

In [27]:
torch.randn_like(tensor)

tensor([[[ 1.1177, -0.4304],
         [ 0.0344, -1.1166]],

        [[-0.0971, -1.0725],
         [-0.7288,  1.1983]],

        [[ 1.3596,  1.4674],
         [ 0.5998,  0.0603]]])

In [28]:
torch.randn(2, 2, device='cpu') # Alternatively, for a GPU tensor, you'd use device='cuda'

tensor([[-1.3831,  0.9659],
        [-0.3856, -0.6807]])

In [29]:
tensor - 2

tensor([[[-1.,  0.],
         [ 1.,  2.]],

        [[ 3.,  4.],
         [ 5.,  6.]],

        [[ 7.,  8.],
         [ 9., 10.]]])

In [30]:
tensor

tensor([[[ 1.,  2.],
         [ 3.,  4.]],

        [[ 5.,  6.],
         [ 7.,  8.]],

        [[ 9., 10.],
         [11., 12.]]])

In [31]:
(tensor - 2) * 2

tensor([[[-2.,  0.],
         [ 2.,  4.]],

        [[ 6.,  8.],
         [10., 12.]],

        [[14., 16.],
         [18., 20.]]])

In [32]:
tensor.mean()

tensor(6.5000)

In [33]:
tensor.std()

tensor(3.6056)

In [34]:
tensor[0].mean()

tensor(2.5000)

In [35]:
import torch.nn as nn

In [36]:
linear = nn.Linear(10, 2)
example_input = torch.randn(3, 10)
example_output = linear(example_input)
example_output

tensor([[-0.5692, -0.5725],
        [ 0.3295, -0.2391],
        [-0.6679,  0.6968]], grad_fn=<AddmmBackward0>)

In [37]:
relu = nn.ReLU()
relu_output = relu(example_output)
relu_output

tensor([[0.0000, 0.0000],
        [0.3295, 0.0000],
        [0.0000, 0.6968]], grad_fn=<ReluBackward0>)

This is for situations where you expect a set of inputs, where each of them is a flat list of numbers. In other words, each input is a vector, not a matrix or higher-dimensional tensor

In [38]:
batchnorm = nn.BatchNorm1d(2)
batchnorm_output = batchnorm(relu_output)
batchnorm_output

tensor([[-0.7070, -0.7071],
        [ 1.4139, -0.7071],
        [-0.7070,  1.4141]], grad_fn=<NativeBatchNormBackward0>)

In [39]:
mlp_layer = nn.Sequential(
    nn.Linear(5, 2),
    nn.BatchNorm1d(2),
    nn.ReLU()
)

test_example = torch.randn(5,5) + 1
print("input: ")
print(test_example)
print("output: ")
print(mlp_layer(test_example))

input: 
tensor([[-0.0996,  0.0597,  0.8005,  0.5929,  1.7384],
        [-0.2759,  2.1376,  1.5072,  0.8220, -1.3406],
        [-0.7505,  1.1139, -0.0519, -0.7215,  0.7238],
        [ 1.6086,  2.3240,  0.2864,  1.0727,  0.5180],
        [ 1.6093, -0.7315,  1.6975, -0.2850,  0.2629]])
output: 
tensor([[1.5228, 0.0000],
        [0.0000, 1.7713],
        [0.6430, 0.0000],
        [0.0000, 0.0000],
        [0.0000, 0.3127]], grad_fn=<ReluBackward0>)


In [40]:
import torch.optim as optim
adam_opt = optim.Adam(mlp_layer.parameters(), lr=1e-1)

As it is from notebook
Training Loop
A (basic) training step in PyTorch consists of four basic parts:

Set all of the gradients to zero using opt.zero_grad() <br>
Calculate the loss, loss <br>
Calculate the gradients with respect to the loss using loss.backward() <br>
Update the parameters being optimized using opt.step() <br>
That might look like the following code (and you'll notice that if you run it several times, the loss goes down):

In [41]:
train_example = torch.randn(100,5) + 1
adam_opt.zero_grad()

# We'll use a simple loss function of mean distance from 1
# torch.abs takes the absolute value of a tensor
cur_loss = torch.abs(1 - mlp_layer(train_example)).mean()

cur_loss.backward()
adam_opt.step()
print(cur_loss)

tensor(0.7745, grad_fn=<MeanBackward0>)


requires_grad_()
You can also tell PyTorch that it needs to calculate the gradient with respect to a tensor that you created by saying example_tensor.requires_grad_(), which will change it in-place. This means that even if PyTorch wouldn't normally store a grad for that particular tensor, it will for that specified tensor.

with torch.no_grad():
PyTorch will usually calculate the gradients as it proceeds through a set of operations on tensors. This can often take up unnecessary computations and memory, especially if you're performing an evaluation. However, you can wrap a piece of code with with torch.no_grad() to prevent the gradients from being calculated in a piece of code.

detach():
Sometimes, you want to calculate and use a tensor's value without calculating its gradients. For example, if you have two models, A and B, and you want to directly optimize the parameters of A with respect to the output of B, without calculating the gradients through B, then you could feed the detached output of B to A. There are many reasons you might want to do this, including efficiency or cyclical dependencies (i.e. A depends on B depends on A).

New nn Classes
You can also create new classes which extend the nn module. For these classes, all class attributes, as in self.layer or self.param will automatically treated as parameters if they are themselves nn objects or if they are tensors wrapped in nn.Parameter which are initialized with the class.

The __init__ function defines what will happen when the object is created. The first line of the init function of a class, for example, WellNamedClass, needs to be super(WellNamedClass, self).__init__().

The forward function defines what runs if you create that object model and pass it a tensor x, as in model(x). If you choose the function signature, (self, x), then each call of the forward function, gets two pieces of information: self, which is a reference to the object with which you can access all of its parameters, and x, which is the current tensor for which you'd like to return y

In [44]:
class ExampleModule(nn.Module):
    def __init__(self, input_dims, output_dims):
        super(ExampleModule, self).__init__()
        self.linear = nn.Linear(input_dims, output_dims)
        self.exponent = nn.Parameter(torch.tensor(1.))

    def forward(self, x):
        x = self.linear(x)

        # This is the notation for element-wise exponentiation,
        # which matches python in general
        x = x ** self.exponent

        return x

In [45]:
example_model = ExampleModule(10, 2)
list(example_model.parameters())

[Parameter containing:
 tensor(1., requires_grad=True),
 Parameter containing:
 tensor([[-0.2652, -0.1425,  0.2142,  0.2702,  0.0689, -0.1227,  0.1936, -0.2916,
          -0.0473, -0.2110],
         [-0.1210,  0.0142,  0.1774,  0.3070, -0.2687, -0.2643, -0.2171, -0.2802,
          -0.2767, -0.0407]], requires_grad=True),
 Parameter containing:
 tensor([ 0.0997, -0.1360], requires_grad=True)]

In [46]:
list(example_model.named_parameters())

[('exponent',
  Parameter containing:
  tensor(1., requires_grad=True)),
 ('linear.weight',
  Parameter containing:
  tensor([[-0.2652, -0.1425,  0.2142,  0.2702,  0.0689, -0.1227,  0.1936, -0.2916,
           -0.0473, -0.2110],
          [-0.1210,  0.0142,  0.1774,  0.3070, -0.2687, -0.2643, -0.2171, -0.2802,
           -0.2767, -0.0407]], requires_grad=True)),
 ('linear.bias',
  Parameter containing:
  tensor([ 0.0997, -0.1360], requires_grad=True))]

In [47]:
input = torch.randn(2, 10)
example_model(input)

tensor([[-0.1386, -1.2815],
        [ 1.1151,  0.7132]], grad_fn=<PowBackward1>)