## Intro to Pytorch

In [4]:
import torch

In [2]:
example_tensor = torch.Tensor(
    [
     [[1, 2], [3, 4]], 
     [[5, 6], [7, 8]], 
     [[9, 0], [1, 2]]
    ]
)

#### .device gives which device the current exeuction is on. To move a tensor to a new device, you can write new_tensor = example_tensor.to(device) where device will be either cpu or cuda

In [5]:
example_tensor.device

device(type='cpu')

In [6]:
example_tensor.shape

torch.Size([3, 2, 2])

##### You can also get the size of a particular dimension n using example_tensor.shape[n] or equivalently example_tensor.size(n)

In [9]:
print("shape[0] =", example_tensor.shape[0])
print("size(1) =", example_tensor.size(1))

shape[0] = 3
size(1) = 2


In [10]:
print("Rank =", len(example_tensor.shape))
print("Number of elements =", example_tensor.numel())

Rank = 3
Number of elements = 12


### Indexing Tensors

In [11]:
example_tensor[1]

tensor([[5., 6.],
        [7., 8.]])

In [13]:
example_tensor[1, 1, 0]

tensor(7.)

#### item() can be used to get scalar output. It cant give array output

In [16]:
example_tensor[1].item()

ValueError: only one element tensors can be converted to Python scalars

In [17]:
example_tensor[1, 1, 0].item()

7.0

In [18]:
example_tensor[:, 0, 0]

tensor([1., 5., 9.])

### Initializing Tensors

In [19]:
torch.ones_like(example_tensor)

tensor([[[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.]]])

In [20]:
torch.zeros_like(example_tensor)

tensor([[[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.]]])

In [22]:
torch.ones([1,2])

tensor([[1., 1.]])

In [23]:
torch.rand([1,2,3])

tensor([[[0.5455, 0.3661, 0.7834],
         [0.3422, 0.7406, 0.4084]]])

In [24]:
torch.randn_like(example_tensor)

tensor([[[-1.5426,  0.9418],
         [-0.1294, -1.2423]],

        [[-1.5872,  0.6830],
         [-2.3797, -1.0744]],

        [[-0.5091, -1.5454],
         [-1.2645,  0.5356]]])

#### assigning device to a tensor. 'cpu' and 'cuda'

In [25]:
torch.randn(2, 2, device='cpu')

tensor([[ 0.8111, -0.0975],
        [ 0.6630,  0.3605]])

In [26]:
torch.randn(2, 2, device='cuda')

tensor([[-0.5949,  1.2121],
        [-0.0271, -0.5514]], device='cuda:0')

In [27]:
torch.randn(2, 2, device='gpu')

RuntimeError: Expected one of cpu, cuda, xpu, mkldnn, opengl, opencl, ideep, hip, ve, ort, mlc, xla, lazy, vulkan, meta, hpu device type at start of device string: gpu

### Basic functions

In [29]:
(example_tensor - 5) * 2

tensor([[[ -8.,  -6.],
         [ -4.,  -2.]],

        [[  0.,   2.],
         [  4.,   6.]],

        [[  8., -10.],
         [ -8.,  -6.]]])

In [30]:
print("Mean:", example_tensor.mean())
print("Stdev:", example_tensor.std())

Mean: tensor(4.)
Stdev: tensor(2.9848)


In [32]:
example_tensor.mean(0)
# Equivalently, you could also write:
# example_tensor.mean(dim=0)
# example_tensor.mean(axis=0)
# torch.mean(example_tensor, 0)
# torch.mean(example_tensor, dim=0)
# torch.mean(example_tensor, axis=0)

tensor([[5.0000, 2.6667],
        [3.6667, 4.6667]])

### PyTorch Neural Network Module (torch.nn)

In [1]:
import torch.nn as nn

#### nn.Linear - to creater a linear layer 

In [2]:
linear = nn.Linear(10,2)

In [9]:
linear

Linear(in_features=10, out_features=2, bias=True)

In [10]:
linear.weight

Parameter containing:
tensor([[-0.2132,  0.1787, -0.1317, -0.0899, -0.0611, -0.0938, -0.2373,  0.0169,
         -0.1135, -0.0804],
        [-0.2815,  0.1950, -0.2063,  0.0597, -0.0764,  0.0353,  0.2607, -0.2214,
          0.2198, -0.2918]], requires_grad=True)

In [11]:
linear.bias

Parameter containing:
tensor([0.1427, 0.1365], requires_grad=True)

In [5]:
example_input = torch.randn(3, 10)

In [8]:
example_input

tensor([[ 0.3497,  1.0903, -0.2264, -0.4400, -0.0087,  0.8040, -0.1124, -1.0580,
         -0.3263,  0.0590],
        [-0.3343,  0.6960,  0.7545,  0.6914, -1.1245,  0.1765,  0.5056, -2.1615,
         -0.3479,  0.1609],
        [ 1.2299, -0.1943,  1.3289,  0.3035, -0.0879,  1.1840, -0.5269, -1.5277,
         -1.2012,  1.4393]])

In [6]:
example_output = linear(example_input)

In [7]:
example_output

tensor([[ 0.2986,  0.4162],
        [ 0.0989,  0.8311],
        [-0.3425, -0.9384]], grad_fn=<AddmmBackward0>)

#### nn.ReLu this will create an object that performs ReLU activation function. 

In [14]:
relu = nn.ReLU()
relu_output = relu(example_output)
relu_output

tensor([[0.2986, 0.4162],
        [0.0989, 0.8311],
        [0.0000, 0.0000]], grad_fn=<ReluBackward0>)

#### nn.BatchNorm1d - is a normalization technique that will rescale a batch of n inputs to have a consistent mean and standard deviation between batches

In [16]:
batchnorm = nn.BatchNorm1d(2)
batchnorm_output = batchnorm(relu_output)
batchnorm_output

tensor([[ 1.3369e+00,  1.2689e-03],
        [-2.7027e-01,  1.2241e+00],
        [-1.0666e+00, -1.2253e+00]], grad_fn=<NativeBatchNormBackward0>)

#### nn.Sequential - creates a single operatoin that performs a sequence of operations.

In [17]:
mlp_layer = nn.Sequential(
    nn.Linear(5,2),
    nn.BatchNorm1d(2),
    nn.ReLU()
)

test_example = torch.rand(5,5) + 1
print("input :")
print(test_example)

print("output :")
print(mlp_layer(test_example))

input :
tensor([[1.2515, 1.2051, 1.7812, 1.5395, 1.2380],
        [1.0270, 1.0415, 1.9429, 1.1792, 1.1764],
        [1.1259, 1.3295, 1.2132, 1.7517, 1.2921],
        [1.5549, 1.3822, 1.8185, 1.7272, 1.4210],
        [1.9021, 1.8471, 1.1719, 1.8387, 1.5734]])
output :
tensor([[0.8362, 0.0000],
        [0.5868, 0.0000],
        [0.0000, 1.8190],
        [0.9631, 0.0000],
        [0.0000, 0.0876]], grad_fn=<ReluBackward0>)


#### Optimizers  - to apply automatic differentiation library

In [19]:
import torch.optim as optim
adam_opt = optim.Adam(mlp_layer.parameters(), lr=1e-1)

#### Training loop 
##### 1. Set all gradients to zero using opt.zero_grad()
##### 2. Calculate loss - loss
##### 3. calculate gradients using loss.backward()
##### 4. Update the parameters being optimized using opt.step()

In [25]:
train_example = torch.randn(100,5) + 1
adam_opt.zero_grad()

curr_loss = torch.abs(1-mlp_layer(train_example)).mean()

curr_loss.backward()
adam_opt.step()
print(cur_loss)

tensor(0.7685, grad_fn=<MeanBackward0>)


#### requires_grad() - to tell pytorch that it needs to calculate grandient w.r.t a tensor - example_tensor.requires_grad_()
#### torch_no_grad() - to prevent gradients from being calcuated 
#### detach - to calculate and use tensor's value without calculating its gradients. 

#### nn classes

In [26]:
class ExampleModule(nn.Module):
    def __init__(self, input_dims, output_dims):
        super(ExampleModule, self).__init__()
        self.linear = nn.Linear(input_dims, output_dims)
        self.exponent = nn.Parameter(torch.tensor(1.))

    def forward(self, x):
        x = self.linear(x)

        # This is the notation for element-wise exponentiation, 
        # which matches python in general
        x = x ** self.exponent 
        
        return x

In [27]:
example_model = ExampleModule(10, 2)
list(example_model.parameters())

[Parameter containing:
 tensor(1., requires_grad=True),
 Parameter containing:
 tensor([[-0.3030,  0.0921, -0.1377, -0.1036,  0.1887,  0.1596,  0.0433,  0.2581,
           0.3135, -0.2882],
         [-0.1493, -0.1197, -0.1676, -0.1245,  0.1598, -0.2525,  0.1622, -0.0803,
          -0.2352, -0.0147]], requires_grad=True),
 Parameter containing:
 tensor([0.2114, 0.0050], requires_grad=True)]

In [28]:
list(example_model.named_parameters())

[('exponent',
  Parameter containing:
  tensor(1., requires_grad=True)),
 ('linear.weight',
  Parameter containing:
  tensor([[-0.3030,  0.0921, -0.1377, -0.1036,  0.1887,  0.1596,  0.0433,  0.2581,
            0.3135, -0.2882],
          [-0.1493, -0.1197, -0.1676, -0.1245,  0.1598, -0.2525,  0.1622, -0.0803,
           -0.2352, -0.0147]], requires_grad=True)),
 ('linear.bias',
  Parameter containing:
  tensor([0.2114, 0.0050], requires_grad=True))]

In [40]:
input = torch.randn(2, 10)
example_model(input)

tensor([[0.3494, 0.7279],
        [0.8325, 0.2719]], grad_fn=<PowBackward1>)