In [1]:
import torch
import numpy as np

In [2]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)

In [3]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

In [4]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.6613, 0.5919],
        [0.7225, 0.5055]]) 



In [5]:
tensor = torch.rand(3,4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")


Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


In [6]:
if torch.cuda.is_available():
  tensor = tensor.to('cuda')

In [7]:
tensor = torch.ones(4, 4)
tensor[:,1] = 0
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [8]:
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [9]:
print(f"tensor.mul(tensor) \n {tensor.mul(tensor)} \n")
# Alternative syntax:
print(f"tensor * tensor \n {tensor * tensor}")

tensor.mul(tensor) 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor * tensor 
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [10]:
print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")
# Alternative syntax:
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor.matmul(tensor.T) 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 

tensor @ tensor.T 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])


### Operations that have a _ suffix are in-place

In [11]:
print(tensor, "\n")
tensor.add_(5)
print(tensor)

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]]) 

tensor([[6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.],
        [6., 5., 6., 6.]])


### In-place operations save some memory, but can be problematic when computing derivatives because of an immediate loss of history. Hence, their use is discouraged

In [12]:
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

t: tensor([1., 1., 1., 1., 1.])
n: [1. 1. 1. 1. 1.]


### Changes in one reflect in the other

In [13]:
np.add(n, 1, out=n)
t

tensor([2., 2., 2., 2., 2.])

In [15]:
import torch, torchvision
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /home/dagarwal/.cache/torch/checkpoints/resnet18-5c106cde.pth
52.5%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100.0%


In [16]:
data

tensor([[[[0.0372, 0.8674, 0.3914,  ..., 0.7922, 0.2006, 0.0075],
          [0.6481, 0.5218, 0.0880,  ..., 0.8605, 0.9599, 0.8196],
          [0.3175, 0.4213, 0.6178,  ..., 0.4225, 0.7595, 0.1537],
          ...,
          [0.4600, 0.9769, 0.6500,  ..., 0.3455, 0.0942, 0.3819],
          [0.0693, 0.4726, 0.3665,  ..., 0.3433, 0.4844, 0.8809],
          [0.1492, 0.0398, 0.7775,  ..., 0.0668, 0.3215, 0.1008]],

         [[0.7939, 0.0640, 0.1577,  ..., 0.3493, 0.2743, 0.3754],
          [0.9999, 0.2540, 0.4748,  ..., 0.5225, 0.8898, 0.0584],
          [0.6209, 0.8062, 0.9923,  ..., 0.9062, 0.0691, 0.9693],
          ...,
          [0.4172, 0.3285, 0.0354,  ..., 0.1206, 0.6039, 0.5429],
          [0.6754, 0.3615, 0.9414,  ..., 0.4672, 0.8576, 0.2438],
          [0.0544, 0.6845, 0.7819,  ..., 0.3378, 0.7467, 0.2150]],

         [[0.8975, 0.2222, 0.3443,  ..., 0.2777, 0.8290, 0.2051],
          [0.1733, 0.1038, 0.4792,  ..., 0.6229, 0.8698, 0.6109],
          [0.5653, 0.9428, 0.4609,  ..., 0

In [20]:
prediction = model(data)

In [21]:
loss = (prediction - labels).sum()
loss.backward() # backward pass

### Backward propagation is kicked off when we call .backward() on the error tensor. Autograd then calculates and stores the gradients for each model parameter in the parameter’s .grad attribute

In [31]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [32]:
optim.step()

### Conceptually, autograd keeps a record of data (tensors) & all executed operations (along with the resulting new tensors) in a directed acyclic graph (DAG) consisting of Function objects. In this DAG, leaves are the input tensors, roots are the output tensors. By tracing this graph from roots to leaves, you can automatically compute the gradients using the chain rule.

### In a NN, parameters that don’t compute gradients are usually called frozen parameters.

### Another common usecase where exclusion from the DAG is important is for finetuning a pretrained network

In [2]:
from torch.autograd import Variable
import torch
x = Variable(torch.FloatTensor([[1, 2, 3, 4]]), requires_grad=True)
z = 2*x
loss = z.sum(dim=1)

In [3]:
z.sum()

tensor(20., grad_fn=<SumBackward0>)

In [4]:
loss

tensor([20.], grad_fn=<SumBackward1>)

In [5]:
torch.randn(10)

tensor([-1.8229, -0.0164, -1.4238,  0.3082, -0.3612, -0.1361, -0.8926, -0.0094,
        -2.1224, -1.1459])