# PYTORCH SESSION_1

In [1]:
import torch
import numpy as np

In [2]:
t = torch.tensor([[0.7071,1.4141,1,865],
                  [0.8245, 2,3422,5.2324]])
t

tensor([[7.0710e-01, 1.4141e+00, 1.0000e+00, 8.6500e+02],
        [8.2450e-01, 2.0000e+00, 3.4220e+03, 5.2324e+00]])

In [3]:
t.dim()

2

In [4]:
t.shape

torch.Size([2, 4])

In [5]:
t[1,2]

tensor(3422.)

In [6]:
t[1,2] = 3.4222


In [7]:
t[1,2].item()

3.4221999645233154

In [8]:
t[:,1:]

tensor([[  1.4141,   1.0000, 865.0000],
        [  2.0000,   3.4222,   5.2324]])

In [9]:
t[:, ::2]

tensor([[0.7071, 1.0000],
        [0.8245, 3.4222]])

## None adds a new (size 1) dimension

In [10]:
t[1,2]

tensor(3.4222)

In [11]:
t[1,2,None]

tensor([3.4222])

In [12]:
t[1,2],t[1,2,None]

(tensor(3.4222), tensor([3.4222]))

# Advanced Indexing with mask

In [13]:
t[t<1] = 0
t

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]])

In [14]:
t < 1, t

(tensor([[ True, False, False, False],
         [ True, False, False, False]]),
 tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
         [  0.0000,   2.0000,   3.4222,   5.2324]]))

# TENSOR ELEMENTS TYPES

- torch.float32 aka torch.float
- torch.float64 aka torch.double
- torch.float16 aka half, bf16
- int8, uint8, int16,int32/int, int64/long
- torch.bool
- torch.complex32, complex64, complex128 (with half/float/double real and imaginary part)
- torch.quint8 for quantized tensors

- <i> for pytorch </i> float32 is the most common dtype
- <i> for indexing </i> int64 is often used, but int32 can be muchfaster if it works

In [15]:
t.dtype

torch.float32

### Specify dtype in factory functions

In [16]:
torch.ones((2,3), dtype = torch.float64)

tensor([[1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

#### Convert if necessary

In [17]:
t.to(torch.double)

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]], dtype=torch.float64)

In [18]:
t.to(torch.float64)

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]], dtype=torch.float64)

In [19]:
t.to(torch.long)

tensor([[  0,   1,   1, 865],
        [  0,   2,   3,   5]])

In [20]:
t.to(torch.half)

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4219,   5.2305]], dtype=torch.float16)

In [21]:
t.to(torch.int)

tensor([[  0,   1,   1, 865],
        [  0,   2,   3,   5]], dtype=torch.int32)

In [22]:
t.double()

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]], dtype=torch.float64)

In [23]:
t.half()

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4219,   5.2305]], dtype=torch.float16)

In [24]:
x = t.long()
x.dtype

torch.int64

In [25]:
t.int()

tensor([[  0,   1,   1, 865],
        [  0,   2,   3,   5]], dtype=torch.int32)

# PUTTING TENSORS ON THE GPU

#### Tensors have a device, we can move between devices and just as we convert between dtypes

In [26]:
t.device

device(type='cpu')

In [27]:
t.to(dtype = torch.half)

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4219,   5.2305]], dtype=torch.float16)

# TENSOR-A SCENIC VIEW OF MEMORY BLOBS

#### Internally, Tensors are (most often) stored as a blob of memory

In [28]:
t

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]])

In [29]:
t.storage()

 0.0
 1.4141000509262085
 1.0
 865.0
 0.0
 2.0
 3.4221999645233154
 5.232399940490723
[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 8]

#### METADATA of tensor

- The size, stride and offset and data pointer(data_ptr)(in addition to device and dtype) are key tensor <i>metadata</i>. They perform Pytorch of how it should resolve a tensor location to a memory location.

- The metadata is always stored on the CPU, but typically provided to CUDA kernels as we go along.

- So in the examples, the index <code>t[1,2]</code> transforms into the memory location <code>1 * t.stride(0) + 2 * t.stride(1) = 1 * 4 + 2 * 1</code>, i.e. memory location 6 (with 0-based counting)

- A tensor where the strides as descending and there are no "gaps" in the storage is called <code>contiguous</code>. You can force to get one (by coping as needed) through <code>t = t.contiguous()</code>.


In [30]:
t.stride()

(4, 1)

In [31]:
t.shape

torch.Size([2, 4])

In [32]:
t.storage_offset()

0

In [33]:
t.data_ptr()

140669039429952

In [34]:
t[1,2]

tensor(3.4222)

In [35]:
v = torch.rand((3,5))
v

tensor([[0.1551, 0.2202, 0.0667, 0.9987, 0.0650],
        [0.6958, 0.6001, 0.0791, 0.1937, 0.0066],
        [0.7979, 0.1320, 0.2372, 0.4751, 0.9901]])

In [36]:
v.storage()

 0.15508830547332764
 0.22020792961120605
 0.0666506290435791
 0.9986899495124817
 0.06496840715408325
 0.6957641839981079
 0.600147008895874
 0.0791136622428894
 0.19369953870773315
 0.006577432155609131
 0.7978680729866028
 0.1319788694381714
 0.23716026544570923
 0.475127637386322
 0.9900521039962769
[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 15]

In [37]:
v.stride(), v.shape, v.storage_offset(), v.data_ptr(), v.storage().data_ptr()

((5, 1), torch.Size([3, 5]), 0, 140668998881600, 140668998881600)

In [38]:
v = torch.rand((3,5)) # It works as a default V values
v = v[:, 2:]
v

tensor([[0.7364, 0.0352, 0.0414],
        [0.0890, 0.9103, 0.3119],
        [0.5576, 0.4658, 0.7648]])

In [39]:
v.storage() # It works as a default V values

 0.5733586549758911
 0.661700427532196
 0.7363727688789368
 0.035160183906555176
 0.04139518737792969
 0.30858278274536133
 0.7208130955696106
 0.08901572227478027
 0.9102987051010132
 0.31191879510879517
 0.3560728430747986
 0.6295565366744995
 0.5576223134994507
 0.465814471244812
 0.7647926807403564
[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 15]

In [40]:
v.stride(), v.shape, v.storage_offset(), v.data_ptr(), v.storage().data_ptr()

((5, 1), torch.Size([3, 3]), 2, 140668996562568, 140668996562560)

In [41]:
t = t.contiguous()
t

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]])

In [42]:
v.storage()

 0.5733586549758911
 0.661700427532196
 0.7363727688789368
 0.035160183906555176
 0.04139518737792969
 0.30858278274536133
 0.7208130955696106
 0.08901572227478027
 0.9102987051010132
 0.31191879510879517
 0.3560728430747986
 0.6295565366744995
 0.5576223134994507
 0.465814471244812
 0.7647926807403564
[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 15]

### <code>contiguous </code> in PyTorch means if the neighboring elements in the tensor are actually next to each other in memory. Let’s take a simple example:

In [43]:
x = torch.tensor([[1, 2, 3], [4, 5, 6]]) # x is contiguous
x

tensor([[1, 2, 3],
        [4, 5, 6]])

In [44]:
x.storage()

 1
 2
 3
 4
 5
 6
[torch.storage._TypedStorage(dtype=torch.int64, device=cpu) of size 6]

In [45]:
y = x.transpose(0, 1) # y is non-contiguous
y

tensor([[1, 4],
        [2, 5],
        [3, 6]])

In [46]:
y.storage()

 1
 2
 3
 4
 5
 6
[torch.storage._TypedStorage(dtype=torch.int64, device=cpu) of size 6]

In [47]:
x.data_ptr()

140668998068736

In [48]:
y.data_ptr()

140668998068736

In [49]:
print(x.is_contiguous())

True


In [50]:
print(y.is_contiguous())

False


Since x is contiguous, <code>x[0][0] and x[0][1] </code> are <b>next to each other in memory</b>. But <code>y[0][0] and y[0][1] </code>is not.

# COMMON VIEW-CREATING OPERATIONS

- <code>.t()</code>, <code>.transpose(...)</code>, <code>.permute(...)</code> : change the order of axes in the stride/size metadata,

- <code>.view(...)</code>: change the stride/size

- (Single) Indexing: drops one dimension, changes offset for indices other than <code>0</code>

- slicing, <code>.narrow</code> : reduces sizes, change offset, interleave <code>(::2)</code> changes stride

- <code>.as_strided</code> : replaces the stride/size metadata with something else, powerful but <b>dangerous </b> in the sense that there are no sanity checks

Special Case: <code>reshape</code> creates view(like <code>view</code>) if possible and copies the tensor if not

<h3><code>view() vs transpose()</code></h3>

<code>transpose()</code>, like <code>view()</code> can also be used to change the shape of a tensor and it also returns a new tensor sharing the data with the original tensor:



One difference is that <code>view() </code>can only operate on contiguous tensor and the returned tensor is still contiguous. <code>transpose()</code> can operate both on contiguous and non-contiguous tensor. Unlike <code>view()</code>, the returned tensor may be not contiguous any more.

<h3><code>permut() vs transpose()</code></h3>

<code>permute()</code> and <code>tranpose()</code> are similar. <code>transpose()</code> can only swap two dimension. But <code>permute()</code>can swap all the dimensions. For example:

In [51]:
v = torch.rand((4,5))
v

tensor([[0.4425, 0.0897, 0.1937, 0.4093, 0.1829],
        [0.3224, 0.2922, 0.1216, 0.4738, 0.3364],
        [0.6190, 0.9352, 0.2751, 0.4832, 0.3374],
        [0.8232, 0.3731, 0.5171, 0.8361, 0.8033]])

In [52]:
v.shape

torch.Size([4, 5])

In [53]:
v.t()

tensor([[0.4425, 0.3224, 0.6190, 0.8232],
        [0.0897, 0.2922, 0.9352, 0.3731],
        [0.1937, 0.1216, 0.2751, 0.5171],
        [0.4093, 0.4738, 0.4832, 0.8361],
        [0.1829, 0.3364, 0.3374, 0.8033]])

In [54]:
v.transpose(0,1) # dim0 <----> dim1

tensor([[0.4425, 0.3224, 0.6190, 0.8232],
        [0.0897, 0.2922, 0.9352, 0.3731],
        [0.1937, 0.1216, 0.2751, 0.5171],
        [0.4093, 0.4738, 0.4832, 0.8361],
        [0.1829, 0.3364, 0.3374, 0.8033]])

In [55]:
v.permute(1,0) # we have 2 dimensions so we can change them by order

tensor([[0.4425, 0.3224, 0.6190, 0.8232],
        [0.0897, 0.2922, 0.9352, 0.3731],
        [0.1937, 0.1216, 0.2751, 0.5171],
        [0.4093, 0.4738, 0.4832, 0.8361],
        [0.1829, 0.3364, 0.3374, 0.8033]])

In [56]:
v.view(5,4)

tensor([[0.4425, 0.0897, 0.1937, 0.4093],
        [0.1829, 0.3224, 0.2922, 0.1216],
        [0.4738, 0.3364, 0.6190, 0.9352],
        [0.2751, 0.4832, 0.3374, 0.8232],
        [0.3731, 0.5171, 0.8361, 0.8033]])

In [57]:
x = torch.rand(3, 5, 3)

In [58]:
x.shape

torch.Size([3, 5, 3])

In [59]:

y = x.transpose(1, 2) # change dim1 and dim2

z = x.permute(2, 1, 0) # change dim2, dim1, dim0

In [60]:
x

tensor([[[0.5759, 0.9303, 0.5310],
         [0.6915, 0.9892, 0.5281],
         [0.1817, 0.5255, 0.5200],
         [0.8090, 0.5901, 0.0245],
         [0.1847, 0.3170, 0.1333]],

        [[0.3970, 0.4477, 0.7566],
         [0.6152, 0.2231, 0.2015],
         [0.4656, 0.2867, 0.9432],
         [0.7010, 0.1540, 0.4959],
         [0.4672, 0.9662, 0.0609]],

        [[0.7007, 0.3559, 0.1589],
         [0.4868, 0.6171, 0.9794],
         [0.3041, 0.0865, 0.8772],
         [0.6889, 0.2504, 0.3014],
         [0.8451, 0.3904, 0.8351]]])

In [61]:
y

tensor([[[0.5759, 0.6915, 0.1817, 0.8090, 0.1847],
         [0.9303, 0.9892, 0.5255, 0.5901, 0.3170],
         [0.5310, 0.5281, 0.5200, 0.0245, 0.1333]],

        [[0.3970, 0.6152, 0.4656, 0.7010, 0.4672],
         [0.4477, 0.2231, 0.2867, 0.1540, 0.9662],
         [0.7566, 0.2015, 0.9432, 0.4959, 0.0609]],

        [[0.7007, 0.4868, 0.3041, 0.6889, 0.8451],
         [0.3559, 0.6171, 0.0865, 0.2504, 0.3904],
         [0.1589, 0.9794, 0.8772, 0.3014, 0.8351]]])

In [62]:
y.shape

torch.Size([3, 3, 5])

In [63]:
z

tensor([[[0.5759, 0.3970, 0.7007],
         [0.6915, 0.6152, 0.4868],
         [0.1817, 0.4656, 0.3041],
         [0.8090, 0.7010, 0.6889],
         [0.1847, 0.4672, 0.8451]],

        [[0.9303, 0.4477, 0.3559],
         [0.9892, 0.2231, 0.6171],
         [0.5255, 0.2867, 0.0865],
         [0.5901, 0.1540, 0.2504],
         [0.3170, 0.9662, 0.3904]],

        [[0.5310, 0.7566, 0.1589],
         [0.5281, 0.2015, 0.9794],
         [0.5200, 0.9432, 0.8772],
         [0.0245, 0.4959, 0.3014],
         [0.1333, 0.0609, 0.8351]]])

In [64]:
z.shape

torch.Size([3, 5, 3])

In [65]:
y.shape

torch.Size([3, 3, 5])

In [66]:
z.shape

torch.Size([3, 5, 3])

<h2><code>torch.narrow(input, dim, start, length)</code></h2>

Returns a new tensor that is a narrowed version of input tensor. The dimension dim is input from start to start + length. The returned tensor and input tensor share the same underlying storage.

- input (Tensor) – the tensor to narrow

- dim (int) – the dimension along which to narrow

- start (Tensor or int) – the starting dimension

- length (int) – the distance to the ending dimension

In [67]:
x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
x

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [68]:
x.shape

torch.Size([3, 3])

In [69]:
torch.narrow(x, 1,2,1) # dim0 = rows, dim1 = columns, starts with 2.column and continue by length (1)

tensor([[3],
        [6],
        [9]])

# IF YOU NEED A COPY

You can use <code>t.colone()</code> to forcefullt create a copy(or <code>.to(device = ..., dtype = ...., copy = True)</code>) if you need a conversion that is guaranteed to copy if the tensor is alreadyin the right form

In [70]:
t2 = t.clone()
t2

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]])

In [71]:
t.data_ptr(), t2.data_ptr(), t, t2 # different address, same content

(140669039429952,
 140668996561344,
 tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
         [  0.0000,   2.0000,   3.4222,   5.2324]]),
 tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
         [  0.0000,   2.0000,   3.4222,   5.2324]]))

In [72]:
t.is_contiguous(), t2.is_contiguous()

(True, True)

# GENERALIZED TENSORS ARE TENSORS, TOO.

- These were strided tensors
- There are other Tensor types  : Sparse, on TPU, ...
- Quantized is a bit special, but similar in spirit

In [73]:
t.layout

torch.strided

In [74]:
s = torch.sparse_coo_tensor(torch.arange(10)[None], torch.randn(10))
s, s.layout

(tensor(indices=tensor([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]),
        values=tensor([-0.5729, -0.5428,  2.0772, -0.7035, -0.4382,  0.5783,
                        0.5230, -0.6898, -0.7627,  0.3398]),
        size=(10,), nnz=10, layout=torch.sparse_coo),
 torch.sparse_coo)

# SAVING AND LOADING TENSORS

In [75]:
torch.save(t, 'my_tensor.pt')
torch.load('my_tensor.pt', map_location='cpu')

tensor([[  0.0000,   1.4141,   1.0000, 865.0000],
        [  0.0000,   2.0000,   3.4222,   5.2324]])

# INPLACE OPERATIONS

Inplace operations modify their inputs instead of creating a new tensors as outputs are signaled by a trailing underscore _

In [76]:
a = torch.rand(2,3)
a,a.data_ptr()

(tensor([[0.1992, 0.0078, 0.5014],
         [0.3508, 0.1924, 0.9293]]),
 140669002752832)

In [77]:
b = torch.rand(2,3)
b

tensor([[0.3546, 0.0758, 0.7404],
        [0.7547, 0.6348, 0.5306]])

In [78]:
a.add(b), a.data_ptr()


(tensor([[0.5537, 0.0836, 1.2418],
         [1.1055, 0.8272, 1.4599]]),
 140669002752832)

In [79]:
a # after adding b parameter, a value doesn't change

tensor([[0.1992, 0.0078, 0.5014],
        [0.3508, 0.1924, 0.9293]])

In [80]:
a.add_(b), a # after adding b parameter a value modified

(tensor([[0.5537, 0.0836, 1.2418],
         [1.1055, 0.8272, 1.4599]]),
 tensor([[0.5537, 0.0836, 1.2418],
         [1.1055, 0.8272, 1.4599]]))