In [38]:
import os
import numpy as np
import torch

# PyTorch Basics - Tensors in PyTorch

## Creating Tensors

### Manually

In [39]:
a = torch.tensor([[1, 2], [3, 4]], dtype=torch.int64, requires_grad=False, device="cpu", pin_memory=False)
print(a)

tensor([[1, 2],
        [3, 4]])


### From Numpy

In [40]:
a = np.random.randint(low=0, high=10, size=(5, 5)) 
a = torch.from_numpy(a)
print(a)

tensor([[8, 7, 1, 5, 3],
        [3, 4, 0, 6, 9],
        [8, 0, 1, 0, 9],
        [5, 6, 9, 9, 0],
        [6, 6, 6, 2, 6]])


### Generate Random 

In [41]:
# Create random integers
a = torch.randint(low=0, high=10, size=(5, 5))
print(f"Random Numbers:\n{a}\n\n")
# Normal distribution
a = torch.randn((5, 5))
print(f"Normal distribution:\n{a}\n")

Random Numbers:
tensor([[7, 1, 5, 2, 9],
        [5, 9, 4, 7, 9],
        [2, 1, 6, 0, 1],
        [8, 2, 6, 7, 1],
        [7, 9, 6, 6, 0]])


Normal distribution:
tensor([[ 0.6421, -0.0464,  0.5318, -1.1109,  1.9069],
        [-1.1127, -0.1155, -0.6312, -0.8137,  0.6080],
        [ 0.4910,  0.6079,  0.8873, -0.9288,  0.4372],
        [ 1.0729,  0.8284,  0.8823, -0.3106, -1.6263],
        [ 0.0508, -1.3158,  1.5849, -0.7185,  1.6096]])



### Specifying the Data Type

In [42]:
a = torch.randn((5, 5), dtype=torch.float64)
print(f"Random Float64:\n{a}\n")

a = torch.randn((5, 5), dtype=torch.float32)
print(f"Random Float32:\n{a}\n")

Random Float64:
tensor([[-1.1425,  1.2393, -0.5514,  1.2568,  0.8067],
        [-0.2907, -0.8787,  0.8167,  0.6535,  1.2286],
        [-2.3156, -0.1284,  0.4144, -0.5583, -0.4511],
        [-0.6270, -1.8702,  0.8138,  0.0251, -0.0364],
        [-0.2156,  0.6797, -0.8549, -0.1414,  1.3385]], dtype=torch.float64)

Random Float32:
tensor([[-1.1230, -0.7131,  1.4695, -1.5100,  0.1988],
        [ 0.3131, -0.4539,  0.4156, -2.1898, -0.9304],
        [ 0.5455,  0.2041, -0.0978,  0.3432,  2.3561],
        [ 0.1273, -0.8996, -0.8928,  2.1836,  1.3041],
        [-0.0843, -0.2349,  0.7035,  0.5674,  1.0699]])



#### Question

1. What's the difference between float64 and float32


2. Why would you choose one over the other?

### Generate Zeros/Ones

In [43]:
zeros = torch.zeros((5, 5, 5))
print(f"Zeros:\n{zeros}\n\n")

ones = torch.ones((5, 5, 5))
print(f"Ones:\n{ones}")

Zeros:
tensor([[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]])


Ones:
tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1.

### Specify a Device

A device can either be the `CPU` (saved on RAM) or a `GPU` device (saved on its memory)

`GPU` devices are denoted as `cuda:0` where the number following the cuda is the index of the GPU in case of multiple GPU devices available

The following is a boilerplate code for determining the device to use


In [44]:
print("CUDA/GPU available:", torch.backends.mps.is_available())

CUDA/GPU available: True


In [45]:
device = (
    torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
)

#### Notes
1. Specifying the device as `cuda` is the standard when using a single device, multi-gpu training is usually handled by PyTorch or other supporting frameworks and packages


2. PyTorch isn't limited to `cpu` or `cuda` as the device, suches as openCl and openGl, ..etc. But keep in mind PyTorch can be slower on these devices and they're intended for special cases 

In [46]:
a = torch.randn((5, 5, 5), device=device)
print(a)

tensor([[[ 0.2655, -0.4082, -0.1934, -0.5825, -0.7249],
         [ 0.0104,  2.0654, -0.0949, -1.3531, -0.0314],
         [ 0.3501, -0.8953, -0.1801,  1.9642, -1.6430],
         [ 0.6529, -0.0257, -0.2086,  1.4790,  0.3531],
         [-0.3820, -0.8054, -0.6677,  1.0123,  1.2991]],

        [[-0.7915,  0.1098,  0.2359, -1.1237, -1.1411],
         [ 1.7585,  1.2164, -0.5669,  0.2683, -0.1893],
         [ 0.2688,  0.8891,  0.8435, -0.0352, -0.1707],
         [ 0.4964, -0.3999,  1.4465,  0.8469,  0.0987],
         [-0.1174,  0.5912,  0.9966, -1.3441,  0.8958]],

        [[-1.2511, -0.8010, -0.5928,  0.2148,  2.6458],
         [ 0.0211, -1.9034, -1.8802,  0.7896, -0.8437],
         [ 1.4681, -0.7512, -1.7511,  1.0434,  2.3964],
         [ 0.5687, -1.0063,  0.8695,  1.6116, -0.5995],
         [-0.3366, -0.4706, -1.1412,  0.5368, -1.8591]],

        [[-1.0399,  2.3589,  0.4613, -0.8939,  0.5795],
         [-1.6861,  0.2827,  0.2706,  0.7811, -0.2820],
         [-0.2748,  2.3431,  0.9938, -1.53

## Tensor Information

### Ndim and Shape

In [47]:
a = torch.randn((5, 5, 5), dtype=torch.float32)

In [48]:
print(f"ndmin:{a.ndim}")
print(f"shape:{a.shape}")

ndmin:3
shape:torch.Size([5, 5, 5])


### Single value tensors

In [49]:
a = torch.tensor(7)

print(f"Tensor:{a}\tType:{type(a)}")
print(f"Value:{a.item()}\tType:{type(a.item())}")

Tensor:7	Type:<class 'torch.Tensor'>
Value:7	Type:<class 'int'>


### Data Type

In [50]:
a.dtype

torch.int64

### Device

In [51]:
a.device

device(type='cpu')

## Tensor Operations

### Element Wise

In [52]:
a = torch.randint(low=0, high=10, size=(3, ))
a

tensor([3, 9, 3])

In [53]:
a + 5

tensor([ 8, 14,  8])

In [54]:
a * 5

tensor([15, 45, 15])

In [55]:
a / 5

tensor([0.6000, 1.8000, 0.6000])

In [56]:
b = torch.randint(low=0, high=10, size=(3, ))
b

tensor([4, 6, 6])

In [57]:
a * b

tensor([12, 54, 18])

In [58]:
torch.mul(a, b)

tensor([12, 54, 18])

In [59]:
torch.div(a, b)

tensor([0.7500, 1.5000, 0.5000])

## Operations

### Vector Operations

In [60]:
a = torch.randint(low=0, high=10, size=(3, ))
b = torch.randint(low=0, high=10, size=(3, ))

In [61]:
a

tensor([9, 2, 6])

In [62]:
b

tensor([5, 1, 2])

In [63]:
# dot product of two vectors
torch.dot(a, b), torch.matmul(a, b)

(tensor(59), tensor(59))

In [64]:
torch.mul(a, b), a * b

(tensor([45,  2, 12]), tensor([45,  2, 12]))

### Matrix Operations

You can use this link to visualize matrix multipication

http://matrixmultiplication.xyz/

In [65]:
a = torch.randint(low=0, high=10, size=(5, 3))
b = torch.randint(low=0, high=10, size=(3, 5))

In [66]:
ab = torch.matmul(a, b)
ab.shape

torch.Size([5, 5])

In [67]:
batch_size = 32
a = torch.randint(low=0, high=10, size=(batch_size, 5, 3))
b = torch.randint(low=0, high=10, size=(batch_size, 3, 5))

In [68]:
ab = torch.bmm(a, b)
ab.shape

torch.Size([32, 5, 5])

In [69]:
torch.all(torch.bmm(a, b) == torch.matmul(a, b))

tensor(True)

### Reshaping

In [70]:
batch_size = 128

In [71]:
a = torch.randn((batch_size, 25))

In [72]:
a.view((batch_size, 5, 5)).shape

torch.Size([128, 5, 5])

In [73]:
a.reshape((batch_size, 5, 5)).shape

torch.Size([128, 5, 5])

Note that the new shape must be valid, as the number of required elements is the same as the number of original elements 

In [74]:
a.reshape((batch_size, 8, 4))

RuntimeError: shape '[128, 8, 4]' is invalid for input of size 3200

View and reshape are used to change the shape of a tensor, they **don't create a new tensor**, what they do is that they provide us with a specific view of that tensor

The main difference is that:
- `view` cannot be used on *non contiguous* tensors
- `reshape` is used on both *contiguous* and *non contiguous* tensors, if the tensor is *contiguous* it will internally use view, otherwise it will use reshape


*contiguous* means that the we are viewing the tensor in the same order it's ordered in in the memory
you can learn more through this [article](https://medium.com/analytics-vidhya/pytorch-contiguous-vs-non-contiguous-tensor-view-understanding-view-reshape-73e10cdfa0dd) 

In [None]:
a = torch.randn((batch_size, 32, 64))

In [None]:
print(f"Permutation: {a.permute((0, 2, 1)).shape}\nReshaping: {a.reshape((batch_size, 64, 32)).shape}")

Permutation: torch.Size([128, 64, 32])
Reshaping: torch.Size([128, 64, 32])


Permutation is an operation to switch the axes of a tensor, without changing each axis length

### Changing the Device

In [75]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [76]:
a = a.to(device)
a.device

device(type='mps', index=0)

## Common Operations

In [77]:
a = torch.randn((5, 5))
a

tensor([[ 0.5641,  0.2369, -0.0366, -0.0636,  1.3189],
        [-0.1681, -1.7498,  1.0637, -1.3931, -0.8938],
        [ 0.4896, -0.1967, -0.9846, -0.1149,  0.4386],
        [ 0.5820, -0.4963,  0.4155,  0.7391,  1.2273],
        [ 0.5746,  0.2857, -0.7124, -0.4985, -1.3165]])

In [78]:
abs_a = a.abs()
abs_a

tensor([[0.5641, 0.2369, 0.0366, 0.0636, 1.3189],
        [0.1681, 1.7498, 1.0637, 1.3931, 0.8938],
        [0.4896, 0.1967, 0.9846, 0.1149, 0.4386],
        [0.5820, 0.4963, 0.4155, 0.7391, 1.2273],
        [0.5746, 0.2857, 0.7124, 0.4985, 1.3165]])

In [79]:
cos_a = a.cos()
cos_a

tensor([[ 0.8451,  0.9721,  0.9993,  0.9980,  0.2493],
        [ 0.9859, -0.1780,  0.4856,  0.1767,  0.6264],
        [ 0.8825,  0.9807,  0.5532,  0.9934,  0.9054],
        [ 0.8354,  0.8793,  0.9149,  0.7391,  0.3368],
        [ 0.8394,  0.9595,  0.7568,  0.8783,  0.2516]])

### min, max, argmin, argmax

In [81]:
a.min(), a.argmin()

(tensor(-1.7498), tensor(6))

`a.min()`: returns the minimum value found in all of the matrix

`a.argmin()`: returns the index of the minimum value, given it's flattened

In [82]:
print(a.min(dim=0), a.argmin(dim=0), sep='\n\n')

torch.return_types.min(
values=tensor([-0.1681, -1.7498, -0.9846, -1.3931, -1.3165]),
indices=tensor([1, 1, 2, 1, 4]))

tensor([1, 1, 2, 1, 4])


`a.min(dim=0)`: returns two items:
- The first is the minimum value found for each vector at the specified dimension, since we specified the `dim=0` (which means rows), we will get the minimum value found at every row, so five rows yeilds five minimum values, 
- The second item is the indices of these values at every given row  

`a.argmin(dim=0)`: returns the indices of the minimum values found at each vector in the specified dimension, similar to the second item returned from the `a.min(dim=0)`

#### Note
`argmin` and `argmax` are specifically helpful in functions like cross entropy, or finding the maximum probability from the output of a neural network

You can view a detailed [list of operations](https://pytorch.org/docs/stable/tensors.html#torch.Tensor.mH)

### Inline Operations

Adding an underscore (_) after an operation, makes the operation in place, meaning it will change the tensor and not returning a new copy as before

In [83]:
abs_a = a.abs()
(a == abs_a).all()

tensor(False)

In [None]:
a.abs_()
(a == abs_a).all()

tensor(True)