# Basic pytorch stuff

In [434]:
!jupytext --to markdown "Basic pytorch.ipynb"

[jupytext] Reading Basic pytorch.ipynb
[jupytext] Writing Basic pytorch.md (destination file replaced)


In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

## Make a 3d tensor 
pay attention that torch.int = int32 and simple int = int64

In [3]:
y = torch.tensor([
     [[1, 2, 3],
      [4, 5, 6]],
     [[1, 2, 3],
      [4, 5, 6]],
     [[1, 2, 3],
      [4, 5, 6]]], 
    dtype = torch.int)
print(y, "\n\n", y.shape)

tensor([[[1, 2, 3],
         [4, 5, 6]],

        [[1, 2, 3],
         [4, 5, 6]],

        [[1, 2, 3],
         [4, 5, 6]]], dtype=torch.int32) 

 torch.Size([3, 2, 3])


## Making summation over different dimentions 

In [4]:
y.sum(dim=0)

tensor([[ 3,  6,  9],
        [12, 15, 18]])

In [5]:
y.sum(dim=1)

tensor([[5, 7, 9],
        [5, 7, 9],
        [5, 7, 9]])

In [6]:
y.sum(dim=2)

tensor([[ 6, 15],
        [ 6, 15],
        [ 6, 15]])

## Float tensors

In [7]:
x = torch.FloatTensor(2,3)
print(x, x.dtype)

tensor([[3.6011e-38, 4.5866e-41, 1.3215e-12],
        [3.0663e-41, 1.4013e-45, 4.5866e-41]]) torch.float32


In [8]:
np_array = np.random.random((2,3)).astype(float)
np_array

array([[0.68070255, 0.61317935, 0.75916742],
       [0.14941783, 0.34229245, 0.91174874]])

In [9]:
x1 = torch.FloatTensor(np_array)
x2 = torch.randn(2,3)
print(x1, x1.dtype, "\n\n",x2, x2.dtype)

tensor([[0.6807, 0.6132, 0.7592],
        [0.1494, 0.3423, 0.9117]]) torch.float32 

 tensor([[-0.9780,  0.0616, -0.0514],
        [ 1.3100,  0.6309,  0.2642]]) torch.float32


## Integer tensors

In [10]:
int_tensor = torch.arange(4, dtype=torch.int)
int_tensor, int_tensor.dtype

(tensor([0, 1, 2, 3], dtype=torch.int32), torch.int32)

In [11]:
int_tensor.view(2,2)

tensor([[0, 1],
        [2, 3]], dtype=torch.int32)

In [12]:
torch.sum(y, dtype=torch.int)

tensor(63, dtype=torch.int32)

In [13]:
e = torch.exp(int_tensor.float())
e

tensor([ 1.0000,  2.7183,  7.3891, 20.0855])

In [16]:
int_tensor*e

tensor([ 0.0000,  2.7183, 14.7781, 60.2566])

## Matmul and MM plus devices

In [28]:
torch.matmul(x1, x2.t())

tensor([[-0.6670,  1.4792],
        [-0.1720,  0.6526]])

In [22]:
torch.matmul(x1, x2.t())

tensor([[-0.6670,  1.4792],
        [-0.1720,  0.6526]])

In [23]:
np.matmul(x1.numpy(), x2.t().numpy())

array([[-0.66702986,  1.4791664 ],
       [-0.1719538 ,  0.6526077 ]], dtype=float32)

In [34]:
torch.matmul(x1.to(torch.float64), x2.to(torch.float64).t())

tensor([[-0.6670,  1.4792],
        [-0.1720,  0.6526]], dtype=torch.float64)

In [37]:
device=torch.device('cuda')
x1 = x1.to(device)
x1.get_device()

0

In [53]:
x = torch.randn(3,2)
x

tensor([[-0.3395,  0.7248],
        [-1.6600, -0.0285],
        [-0.3693, -1.8319]])

In [51]:
try:
    x.to(device).numpy()
except RuntimeError as e:
    print(e)
except Exception as e:
    print(e)
finally:
    print('OK')

can't convert CUDA tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
OK


In [248]:
torch.mm(torch.ones(2,3), torch.ones(3))

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

In [249]:
torch.matmul(torch.ones(2,3), torch.ones(3))

tensor([3., 3.])

## A bit of autograd

In [73]:
x = torch.arange(0,4).float().requires_grad_(True)
x

tensor([0., 1., 2., 3.], requires_grad=True)

In [74]:
y = x**2

In [75]:
y.sum()

tensor(14., grad_fn=<SumBackward0>)

In [77]:
y.sum().backward()

In [78]:
x.grad

tensor([0., 2., 4., 6.])

In [158]:
net = torch.nn.Linear(4,2)

In [159]:
net

Linear(in_features=4, out_features=2, bias=True)

In [163]:
f = torch.arange(0,4).float()
f

tensor([0., 1., 2., 3.])

In [161]:
y = net(f)

In [162]:
y

tensor([-0.6486, -0.9467], grad_fn=<AddBackward0>)

In [164]:
for param in net.parameters():
    print (param)

Parameter containing:
tensor([[ 0.0062, -0.0700,  0.2314, -0.3623],
        [ 0.2912, -0.1247, -0.1449, -0.1122]], requires_grad=True)
Parameter containing:
tensor([ 0.0457, -0.1956], requires_grad=True)


##  Простейшие перцептрончики

In [None]:
help(torch.nn.Module)

In [251]:
class MyNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.layer1 = torch.nn.Linear(input_size, hidden_size)
        self.layer2 = torch.nn.Linear(hidden_size,2)
        self.layer3 = torch.nn.Sigmoid()
        
    def forward(self, input_val):
        h = input_val
        h = self.layer1(h)
        h = self.layer2(h)
        h = self.layer3(h)
        return h
    
    def print_params(self):
        for item in self.named_parameters():
            print(item)

In [252]:
net = MyNet(4,16)

In [253]:
net.print_params()

('layer1.weight', Parameter containing:
tensor([[-0.1583,  0.3567,  0.3486, -0.0454],
        [-0.0739,  0.4934, -0.2603,  0.0862],
        [-0.0727, -0.4693,  0.3118, -0.2353],
        [-0.2766,  0.3744,  0.2552,  0.1936],
        [ 0.2989,  0.1068,  0.3625,  0.1305],
        [ 0.1892, -0.4528,  0.0262, -0.0850],
        [-0.2442,  0.0044, -0.3989, -0.0772],
        [-0.1609,  0.3161, -0.1666,  0.4822],
        [-0.0036, -0.3015,  0.3174,  0.2475],
        [-0.1829,  0.3347, -0.4266,  0.3965],
        [-0.0856,  0.3709,  0.2133, -0.2934],
        [ 0.2672,  0.4092,  0.0927,  0.3500],
        [ 0.0573,  0.2821,  0.2022, -0.0999],
        [ 0.4962,  0.2974,  0.2953, -0.0262],
        [-0.1438,  0.3702,  0.4619,  0.2413],
        [ 0.3241,  0.2545,  0.3146,  0.3213]], requires_grad=True))
('layer1.bias', Parameter containing:
tensor([-0.0095, -0.1366,  0.1833, -0.4231, -0.1795,  0.3327, -0.1543,  0.3713,
         0.3201,  0.3860, -0.4400, -0.3983, -0.3827,  0.2750, -0.4396,  0.0322],
   

In [221]:
net.forward(torch.rand(4))

tensor([0.5246, 0.5869], grad_fn=<SigmoidBackward>)

In [256]:
class MyNet2(torch.nn.Sequential):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__(nn.Linear(input_size, hidden_size), 
                         nn.Linear(hidden_size, output_size), 
                         nn.Softmax(dim=1))
    def print_params(self):
        for item in self.parameters():
            print(item)

In [257]:
def make_net2(input_size, hidden_size, output_size):
    return nn.Sequential(nn.Linear(input_size, hidden_size), 
                             nn.Linear(hidden_size, output_size), 
                             nn.Sigmoid())

In [258]:
ttt = MyNet2(4,16, 10)

In [259]:
net2 = make_net2(4,16,10)

In [260]:
net2.forward(torch.ones(4))

tensor([0.3793, 0.3303, 0.5239, 0.6062, 0.5702, 0.4860, 0.6190, 0.5291, 0.4958,
        0.3813], grad_fn=<SigmoidBackward>)

In [261]:
net2

Sequential(
  (0): Linear(in_features=4, out_features=16, bias=True)
  (1): Linear(in_features=16, out_features=10, bias=True)
  (2): Sigmoid()
)

In [264]:
ttt.forward(torch.ones(1,4))

tensor([[0.0401, 0.0630, 0.1632, 0.1117, 0.1190, 0.0536, 0.0935, 0.0718, 0.1397,
         0.1445]], grad_fn=<SoftmaxBackward>)

In [269]:
output = torch.full([10, 64], 0.999)  # A prediction (logit)
pos_weight = torch.ones([64])  # All weights are equal to 1
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
criterion(output, target)  # -log(sigmoid(0.999))

tensor(0.3135)

#  Experimenting with convolutions and autograd

We make an input tensor that does not require grad

In [219]:
# this is an input tensor
input_tensor = torch.arange(1,10).to(torch.float).view((1,1,3,3))
input_tensor

tensor([[[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]])

In [222]:
print(f"The tenosr does not require a grad wrt it: {input_tensor.requires_grad}")

The tenosr does not require a grad wrt it: False


We make a custom conv2d layer that initializes all weights to 1 and does not have a bias

In [223]:
class Conv2dCustom(nn.Conv2d):
    def __init__(self, in_channels=1, out_channels=1, kernel_size=(3,3)):
        super().__init__(in_channels, out_channels, kernel_size, bias=False)
        self.weight.data = torch.ones((in_channels, out_channels, *kernel_size))
        #self.bias.data = torch.zeros(1)
    def __repr__(self):
        super_repr =  super().__repr__()
        return super_repr+'\nweights:\n'+str(self.weight.data)#+'\nbias:\n'+str(self.bias.data)  

In [228]:
layer_conv2d = Conv2dCustom()
layer_conv2d

Conv2dCustom(1, 1, kernel_size=(3, 3), stride=(1, 1), bias=False)
weights:
tensor([[[[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]])

In [229]:
print(f'See if conv 2d requires grad: {layer_conv2d.weight.requires_grad}')

See if conv 2d requires grad: True


In [231]:
# provided all weights of a convolution are 1 - this assert should be correct
assert layer_conv2d(input_tensor) == input_tensor.sum().item() 

In [236]:
# no gradient so far
type(layer_conv2d.weight.grad)

NoneType

In [232]:
input_tensor.sum().item()

45.0

This shows that Linear layers can be replaced with convolutions. 

In [237]:
# and now let's coumpute the output of a single conv layer and run backward grad propagation
input_tensor.requires_grad_()
out = layer_conv2d(input_tensor)
print(out)

tensor([[[[45.]]]], grad_fn=<ThnnConv2DBackward>)


In [238]:
out.backward()

In [239]:
input_tensor

tensor([[[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]], requires_grad=True)

In [244]:
print('Gradient w.r.t. input tensor (equals to weights of convolution)')
input_tensor.grad

Gradient w.r.t. input tensor (equals to weights of convolution)


tensor([[[[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]])

In [241]:
layer_conv2d.weight

Parameter containing:
tensor([[[[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]], requires_grad=True)

In [245]:
print('Gradient w.r.t convolution weight (equals to input tensor)')
layer_conv2d.weight.grad

Gradient w.r.t convolution weight (equals to input tensor)


tensor([[[[1., 2., 3.],
          [4., 5., 6.],
          [7., 8., 9.]]]])

What we see here is that multiplication is a **gradient switcher**, i.e. gradient **w.r.t  x=w** and grad w.r.t **w** equals to **x**
$$
\large Out = \sum_i^n{w_i * x_i}
$$

In [247]:
conv2d_filter = nn.Conv2d(1,1,(3,3))
for param, data in conv2d_filter.named_parameters():
    print(param, data, data.shape)

weight Parameter containing:
tensor([[[[-0.0692, -0.2108, -0.3296],
          [-0.2549,  0.0373,  0.1311],
          [ 0.3123, -0.0539,  0.0950]]]], requires_grad=True) torch.Size([1, 1, 3, 3])
bias Parameter containing:
tensor([0.1419], requires_grad=True) torch.Size([1])


### 1D Convolutions 

In [393]:
def init_weight(m):
    from functools import reduce
    l = reduce(lambda x,y: x*y, m.weight.data.shape)
    if type(m) == nn.Conv1d:
        m.weight.data = torch.arange(l).to(torch.float).reshape(m.weight.data.shape)

In [394]:
layer_conv1d = nn.Conv1d(in_channels=1, out_channels=3, kernel_size=3, bias=False)
layer_conv1d.apply(init_weight)

Conv1d(1, 3, kernel_size=(3,), stride=(1,), bias=False)

In [395]:
layer_conv1d.weight.data

tensor([[[0., 1., 2.]],

        [[3., 4., 5.]],

        [[6., 7., 8.]]])

In [398]:
layer_conv1d.weight.requires_grad

True

In [418]:
conv_1d_input = torch.arange(12).to(torch.float).view(3,1,4)
conv_1d_input

tensor([[[ 0.,  1.,  2.,  3.]],

        [[ 4.,  5.,  6.,  7.]],

        [[ 8.,  9., 10., 11.]]])

In [424]:
extra = torch.FloatTensor([[[ 0.,  1.,  2.,  3.]]])

In [428]:
conv_1d_input = torch.cat((conv_1d_input, extra), dim = 0)

In [429]:
conv_1d_input

tensor([[[ 0.,  1.,  2.,  3.]],

        [[ 4.,  5.,  6.,  7.]],

        [[ 8.,  9., 10., 11.]],

        [[ 0.,  1.,  2.,  3.]]])

In [430]:
conv_1d_input[0]

tensor([[0., 1., 2., 3.]])

In [431]:
conv_1d_out = layer_conv1d(conv_1d_input)

In [432]:
conv_1d_out

tensor([[[  5.,   8.],
         [ 14.,  26.],
         [ 23.,  44.]],

        [[ 17.,  20.],
         [ 62.,  74.],
         [107., 128.]],

        [[ 29.,  32.],
         [110., 122.],
         [191., 212.]],

        [[  5.,   8.],
         [ 14.,  26.],
         [ 23.,  44.]]], grad_fn=<SqueezeBackward1>)

In [433]:
conv_1d_out[0]

tensor([[ 5.,  8.],
        [14., 26.],
        [23., 44.]], grad_fn=<SelectBackward>)