# [REQUIRED]

* `pip install torch torchaudio torchvision torchtext`
* install `tensorboard` and `jupyter` from VSCode

# torch basics

reference 
* https://pytorch.org/docs/stable/torch.html
* https://pytorch.org/docs/stable/tensors.html

In [None]:
# pytorch
import torch as tt
#import torchvision, torchaudio, torchtext
print(f'{tt.__version__=}')

In [None]:
jc = tt.nn.JANETCell(input_size=3, hidden_size=6, bias=False)
jc

# Creation Ops

In [None]:
tt.tensor(data=None, dtype=None, device=None, requires_grad=None)       # always creates a copy, (use .clone().detach())
tt.as_tensor(data=None, dtype=None, device=None, requires_grad=None)    # preserves autograd history and avoids copies where possible.
tt.from_numpy(data=None, dtype=None, device=None, requires_grad=None)   # creates a tensor that shares storage with a NumPy array.

tt.zeros(size=None, dtype=None, device=None, requires_grad=None)
tt.zeros_like(input=None, dtype=None, device=None, requires_grad=None)

tt.ones(size=None, dtype=None, device=None, requires_grad=None)
tt.ones_like(input=None, dtype=None, device=None, requires_grad=None)

tt.empty(size=None, dtype=None, device=None, requires_grad=None)
tt.empty_like(input=None, dtype=None, device=None, requires_grad=None)

tt.full(size=None, fill_value=None, dtype=None, device=None, requires_grad=None)
tt.full_like(input=None, fill_value=None, dtype=None, device=None, requires_grad=None)

tt.arange(start=None, end=None, step=None, dtype=None, device=None, requires_grad=None) # returs 1-D tensors
tt.linspace(start=None, end=None, steps=None, dtype=None, device=None, requires_grad=None) # returs 1-D tensors
tt.logspace(start=None, end=None, steps=None, base=None, dtype=None, device=None, requires_grad=None) # returs 1-D tensors

tt.eye(n=None, m=None, dtype=None, device=None, requires_grad=None) # (n,m) identity matrix (2-D), optinal m=n
tt.heaviside(input=None, values=None, dtype=None, device=None, requires_grad=None) # shapes like input, (0,1,v) if input(<0,>0,==0)


# RNG Generator

In [None]:
rng = tt.Generator() # https://pytorch.org/docs/stable/generated/torch.Generator.html#torch.Generator
rng # rng can be passed to other random generating functions of torch (like in place sampling)

In [None]:
rng.initial_seed() # shows the current seed for rng

In [None]:
rng.manual_seed(rng.initial_seed()+1)  # set the seed to rng to something manually
#  It is recommended to set a large seed, i.e. a number that has a good balance of 0 and 1 bits. Avoid having many 0 bits in the seed.
rng.initial_seed()

In [None]:
rng.seed() # set the seed to rng to something randomly
# Gets a non-deterministic random number from std::random_device or the current time and uses it to seed a Generator.
rng.initial_seed()

## Setting RNG states

### [1] - store the current state

In [None]:
rng_state = rng.get_state()
# Returns the Generator state as a torch.ByteTensor.
# A torch.ByteTensor which contains all the necessary bits to restore a Generator to a specific point in time.
rng.initial_seed(), rng_state #<---- current rng state

### [2] - set random seed - changes the current state

In [None]:
rng.seed() #<--- we change the rng state by seeding randomly
rng.initial_seed(), rng.get_state()

### [3] - restore the orignal state from (1)

In [None]:
rng.set_state(rng_state) #<---- reverting back to orignal state before random seeding
rng.initial_seed(), rng.get_state()

# Random Sampling 

by default, the rng used by torch can be manipulated using same functions as described in the RNG Generator section above

In [None]:
print('Initial-Seed:', tt.initial_seed()) # current seed for default rng

# set seed
tt.manual_seed(tt.initial_seed()+1) # manually sets a seed for random sampling creation ops
print('Manual-Seed:', tt.initial_seed()) # current seed for default rng

# or 
tt.seed() # randomly sets a seed for random sampling creation ops
print('Random-Seed:', tt.initial_seed()) # current seed for default rng

# we can use 'get_rng_state()' and 'set_rng_state()' similarily

## Creation Ops - Random Sampling

cannot use `requires_grad` for these

In [None]:
tt.bernoulli(
    input=tt.tensor([0.5, 0.5]),  # a tensor containing probability value of generating a 1
    generator=rng)
    
tt.multinomial(
    input=tt.tensor([[2.0, 4.0, 3.0], [4.0, 7.0, 5.0]]), # probability or weights
    # The rows of input do not need to sum to one (in which case we use the values as weights), 
    # but must be non-negative, finite and have a non-zero sum.
    # If input is a vector, out is a vector of size num_samples.
    # If input is a matrix with m rows, out is an matrix of shape (m, num_samples).
    num_samples=4,
    replacement=True,
    generator=rng)

tt.normal(
    mean=tt.tensor([[2.0, 4.0, 3.0], [4.0, 7.0, 5.0]]),
    std=tt.tensor([[2.0, 4.0, 3.0], [4.0, 7.0, 5.0]]),
    generator=rng)

tt.poisson(
    input=tt.tensor([[2.0, 4.0, 3.0], [4.0, 7.0, 5.0]]), # contains 'rate' for possion distribution
    generator=rng)

can use `requires_grad` for these

In [None]:
tt.rand(size=(1,2), dtype=None, device=None, requires_grad=None)
tt.rand_like(input=None, dtype=None, device=None, requires_grad=None)

tt.randint(low=0, high=10, size=(3,4), dtype=None, device=None, requires_grad=None)
tt.randint_like(input=None, low=0, high=10, dtype=None, device=None, requires_grad=None)

tt.randn(size=(1,2), dtype=None, device=None, requires_grad=None)
tt.randn_like(input=None, dtype=None, device=None, requires_grad=None)

tt.randperm(n=10, dtype=None, device=None, requires_grad=None)


## Inplace Ops - Random Sampling

https://pytorch.org/docs/stable/torch.html#in-place-random-sampling

In [None]:
tt.Tensor.bernoulli_() #- in-place version of torch.bernoulli()

tt.Tensor.cauchy_() #- numbers drawn from the Cauchy distribution

tt.Tensor.exponential_() #- numbers drawn from the exponential distribution

tt.Tensor.geometric_() #- elements drawn from the geometric distribution

tt.Tensor.log_normal_() #- samples from the log-normal distribution

tt.Tensor.normal_() #- in-place version of torch.normal()

tt.Tensor.random_() #- numbers sampled from the discrete uniform distribution

tt.Tensor.uniform_() #- numbers sampled from the continuous uniform distribution

# Indexing, Slicing, Joining, Mutating Ops

https://pytorch.org/docs/stable/torch.html#in-place-random-sampling

In [None]:
# basic slicing
tt.Tensor[ 'start':'stop':'step', ..., ..., ... ]

In [None]:
tt.cat(tensors='List or tuple of tensors', dim='dimension') # same as concat, cancatenate
# torch.cat() can be seen as an inverse operation for torch.split() and torch.chunk().

tt.chunk(input='tensor', chunks='int', dim=0) # -> List of Tensors - chunks are views
# This function may return less then the specified number of chunks!
# If the tensor size along the given dimesion dim is divisible by chunks, all returned chunks will be the same size. 
# If the tensor size along the given dimension dim is not divisible by chunks, all returned chunks will be the same size, except the last one. 
# If such division is not possible, this function may return less than the specified number of chunks.

tt.split(tensor='tensor', split_size_or_sections='int of list_of_int', dim=0) # Splits the tensor into chunks. 
# Each chunk is a view of the original tensor.
# If split_size_or_sections is an integer type, then tensor will be split into equally sized chunks (if possible). 
# Last chunk will be smaller if the tensor size along the given dimension dim is not divisible by split_size.
# If split_size_or_sections is a list, then tensor will be split into len(split_size_or_sections) chunks 
# with sizes in dim according to split_size_or_sections

tt.tensor_split(input='tensor', indices_or_sections="Tensor, int or list or tuple of ints", dim=0) # -> List of Tensors, as views
# If indices_or_sections is an integer n or a zero dimensional long tensor with value n, input is split into n sections along dimension dim. 
# If input is divisible by n along dimension dim, each section will be of equal size, input.size(dim) / n. 
# If input is not divisible by n, the sizes of the first int(input.size(dim) % n) sections will have size int(input.size(dim) / n) + 1, 
# and the rest will have size int(input.size(dim) / n).
# If indices_or_sections is a list or tuple of ints, or a one-dimensional long tensor, 
# then input is split along dimension dim at each of the indices in the list, tuple or tensor
tt.hsplit(), tt.vsplit(), tt.dsplit() # are some similar functions



tt.stack(tensors='List or tuple of tensors', dim=0) # ->Tensor
# Concatenates a sequence of tensors along a new dimension. All tensors need to be of the same size.
tt.hstack(), tt.vstack(), tt.dstack() # are some similar functions
tt.row_stack() # alias of vstack
tt.column_stack(tensors='List or tuple of tensors') # ->Tensor # Equivalent to torch.hstack(tensors), 
# except each zero or one dimensional tensor t in tensors is first reshaped into a (t.numel(), 1) column 
# before being stacked horizontally.

tt.gather(input, dim='int', index='LongTensor', sparse_grad=False) # ->Tensor # Gathers values along an axis specified by dim. (is NOT a view)
# input and index must have the same number of dimensions. 
# It is also required that index.size(d) <= input.size(d) for all dimensions d != dim. 
# out will have the same shape as index. Note that input and index do not broadcast against each other.
# sparse_grad (bool, optional) – If True, gradient w.r.t. input will be a sparse tensor.

tt.index_select(input, dim='int', index='(IntTensor or LongTensor) – the 1-D tensor containing the indices to index') # -> Tensor, NOT a view
# The returned tensor does not use the same storage as the original tensor.
#  If out has a different shape than expected, we silently change it to the correct shape, reallocating the underlying storage if necessary.
tt.masked_select() # is similar function

tt.movedim(input, 
            source='(int or tuple of ints) – Original positions of the dims to move. These must be unique.', 
            destination='(int or tuple of ints) – Destination positions for each of the original dims. These must also be unique.') 
# returns Tensor (View). Moves the dimension(s) of input at the position(s) in source to the position(s) in destination.
# dimensions of input that are not explicitly moved remain in their original order and appear at the positions not specified in destination.
tt.moveaxis() # alias of movedim

# a similar function to 'movedim' is 'permute'
tt.permute(input, dims='(tuple of python:int) – The desired ordering of dimensions') #<--- is a view
tt.permute_copy(input, dims='(tuple of python:int) – The desired ordering of dimensions') #<--- is NOT a view


tt.reshape() # this may or may not return a view


# removing dims 
tt.narrow(), tt.squeeze(), tt.unsqueeze() #<--- is a view
tt.narrow_copy(), tt.squeeze_copy(), tt.unsqueeze_copy() #<--- is NOT a view


tt.transpose(input, dim0='int', dim1='int'), tt.transpose_copy() # Tensor
# Returns a tensor that is a transposed version of input. The given dimensions dim0 and dim1 are swapped.
tt.t(), tt.t_copy() # shot for teanspose - Expects input to be (<=2-D) tensor and transposes dimensions 0 and 1.
tt.swapaxes(), tt.swapdims() # alias for transpose

tt.tile() # repeating (tiling)
tt.unbind(input, dim=0) # seq, splits but is NOT a view


# selction
tt.take(input, index='(LongTensor) – the indices into tensor') # Tensor
# Returns a new tensor with the elements of input at the given indices. The input tensor is treated as if it were viewed as a 1-D tensor. 
# The result takes the same shape as the indices.
tt.take_along_dim(input, indices=' (tensor) – the indices into input. Must have long dtype.', dim='int') # Tensor
# Selects values from input at the 1-dimensional indices from indices along the given dim.
# Functions that return indices along a dimension, like torch.argmax() and torch.argsort(), 
# are designed to work with this function. See the examples below.
"""
>>> t = torch.tensor([[10, 30, 20], [60, 40, 50]])
>>> max_idx = torch.argmax(t)
>>> torch.take_along_dim(t, max_idx)
tensor([60])
>>> sorted_idx = torch.argsort(t, dim=1)
>>> torch.take_along_dim(t, sorted_idx, dim=1)
tensor([[10, 20, 30],
        [40, 50, 60]])
        
"""

# conditionals
tt.nonzero(input='Tensor', as_tuple=False) # returns indices of non-zero positions
tt.where(
    condition='(BoolTensor)', 
    x='(Tensor or Scalar) – value (if x is a scalar) or values selected at indices where condition is True', 
    y='(Tensor or Scalar) – value (if y is a scalar) or values selected at indices where condition is False'
    ) # Tensor - when only condition is supplied : torch.where(condition) → tuple of LongTensor (indices)
"""
>>> x = torch.randn(3, 2)
>>> y = torch.ones(3, 2)
>>> x
tensor([[-0.4620,  0.3139],
        [ 0.3898, -0.7197],
        [ 0.0478, -0.1657]])
>>> torch.where(x > 0, x, y)
tensor([[ 1.0000,  0.3139],
        [ 0.3898,  1.0000],
        [ 0.0478,  1.0000]])
>>> x = torch.randn(2, 2, dtype=torch.double)
>>> x
tensor([[ 1.0779,  0.0383],
        [-0.8785, -1.1089]], dtype=torch.float64)
>>> torch.where(x > 0, x, 0.)
tensor([[1.0779, 0.0383],
        [0.0000, 0.0000]], dtype=torch.float64)
"""

# Math Operations

see https://pytorch.org/docs/stable/torch.html#math-operations

Types of Math Ops

* https://pytorch.org/docs/stable/torch.html#pointwise-ops
* https://pytorch.org/docs/stable/torch.html#reduction-ops
* https://pytorch.org/docs/stable/torch.html#comparison-ops
* https://pytorch.org/docs/stable/torch.html#spectral-ops
* https://pytorch.org/docs/stable/torch.html#other-operations
* https://pytorch.org/docs/stable/torch.html#blas-and-lapack-operations

# Type checking

In [None]:
t = tt.zeros((2,3), dtype=tt.float32)
tt.is_tensor(t), \
tt.is_floating_point(t), \
tt.is_nonzero(t[0,1]), \
tt.numel(t)

## default dtype

In [None]:
print(tt.get_default_dtype())

# sets for current python session - restarting will reset 
# Supports torch.float32 and torch.float64 as inputs. 
# Other dtypes may be accepted without complaint but are not supported and are unlikely to work as expected
tt.set_default_dtype(d=tt.float64) 
# tt.set_default_tensor_type(t=??) #<-- optinally use this

print(tt.get_default_dtype())

## Shared memory

if array or tensors share memory - changing values will reflect accross all views

In [None]:
a = tt.arange(10)
a1 = a.reshape(5,2)
a2 = a[2:5]
a, a1, a2, known.COMMON_TORCH.shares_memory(a1,a2)

# similar for numpy
#a = np.arange(10)
#a1 = a.reshape(5,2)
#a2 = a[2:5]
#a, a1, a2, known.COMMON_NUMPY.shares_memory(a1,a2)

# Printing to output (verbose)

use set_printoptions()

* precision – Number of digits of precision for floating point output (default = 4).

* threshold – Total number of array elements which trigger summarization rather than full repr (default = 1000).

* edgeitems – Number of array items in summary at beginning and end of each dimension (default = 3).

* linewidth – The number of characters per line for the purpose of inserting line breaks (default = 80). Thresholded matrices will ignore this parameter.

* profile – Sane defaults for pretty printing. Can override with any of the above options. (any one of default, short, full)

* sci_mode – Enable (True) or disable (False) scientific notation. If None (default) is specified, the value is defined by torch._tensor_str._Formatter. This value is automatically chosen by the framework.

In [None]:
# exactly same for numpy and torch
#np.set_printoptions()
tt.set_printoptions()