In [1]:
import torch
from torch import nn
from torch import optim
import torchvision.datasets as datasets
import torch.nn.functional as F
import numpy as np

In [2]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [3]:
mnist = datasets.MNIST('', download=True)
model = Net()
loss_fn = nn.CrossEntropyLoss()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST\raw\train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST\raw\train-images-idx3-ubyte.gz to MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST\raw\train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST\raw\train-labels-idx1-ubyte.gz to MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST\raw\t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST\raw\t10k-images-idx3-ubyte.gz to MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST\raw\t10k-labels-idx1-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting MNIST\raw\t10k-labels-idx1-ubyte.gz to MNIST\raw
Processing...
Done!




In [4]:
conv = model.conv1


In [5]:
for layer in model.children():
    print(layer)

Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
Dropout2d(p=0.25, inplace=False)
Dropout2d(p=0.5, inplace=False)
Linear(in_features=9216, out_features=128, bias=True)
Linear(in_features=128, out_features=10, bias=True)


In [6]:
for name, param in conv.named_parameters():
    print(name)
    print(param.data)

weight
tensor([[[[-0.0332,  0.2192,  0.2646],
          [-0.1101,  0.1869,  0.1640],
          [ 0.0169,  0.2177, -0.2522]]],


        [[[ 0.1135,  0.1336, -0.0417],
          [-0.0304, -0.3204, -0.2328],
          [-0.0380, -0.0607, -0.2473]]],


        [[[ 0.3332, -0.2677,  0.2344],
          [-0.1888,  0.3219,  0.0655],
          [ 0.3100,  0.0799,  0.1524]]],


        [[[ 0.1075,  0.2366,  0.1051],
          [ 0.3175, -0.2650,  0.2858],
          [-0.2083, -0.0630, -0.1849]]],


        [[[ 0.2503, -0.1142,  0.1655],
          [ 0.0274, -0.0398,  0.1931],
          [ 0.0168,  0.3125, -0.1434]]],


        [[[-0.2091, -0.2358, -0.1158],
          [-0.2343,  0.2855,  0.2813],
          [ 0.2977,  0.1809, -0.1326]]],


        [[[ 0.2589,  0.3232,  0.0187],
          [ 0.1462,  0.0586, -0.0860],
          [-0.2801, -0.3242,  0.0103]]],


        [[[ 0.2025, -0.0681,  0.1103],
          [ 0.1364, -0.1686, -0.1084],
          [ 0.0029, -0.2111,  0.2705]]],


        [[[ 0.2717,  0.01

In [24]:
for param in model.state_dict():
    print(repr(model.state_dict()[param]))

tensor([[[[ 0.2992,  0.0415,  0.1874],
          [ 0.2128,  0.1912,  0.0177],
          [-0.0716, -0.3304, -0.1873]]],


        [[[ 0.1323, -0.0467,  0.0316],
          [ 0.2406,  0.2456, -0.2723],
          [ 0.2765, -0.0848,  0.2515]]],


        [[[-0.1289,  0.0084, -0.2524],
          [-0.2310, -0.2067, -0.2992],
          [-0.2449,  0.0686, -0.1524]]],


        [[[-0.2852, -0.2371,  0.2585],
          [ 0.2860, -0.1991, -0.1926],
          [-0.2471, -0.0411,  0.2414]]],


        [[[ 0.0875, -0.0554,  0.1339],
          [-0.2324,  0.3246,  0.1637],
          [ 0.1022,  0.1181,  0.2366]]],


        [[[ 0.1815, -0.1313, -0.1170],
          [-0.1389,  0.1520,  0.0507],
          [-0.2228,  0.1516,  0.2179]]],


        [[[-0.1073, -0.2914, -0.1875],
          [ 0.0361,  0.1776, -0.0176],
          [ 0.1927,  0.3073, -0.2285]]],


        [[[-0.1166, -0.0266, -0.0521],
          [ 0.1116, -0.2810,  0.2748],
          [-0.1927,  0.1796,  0.2141]]],


        [[[ 0.2223,  0.0928,  0.

In [7]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

In [8]:
optimizer.param_groups

[{'params': [Parameter containing:
   tensor([[[[-0.0332,  0.2192,  0.2646],
             [-0.1101,  0.1869,  0.1640],
             [ 0.0169,  0.2177, -0.2522]]],
   
   
           [[[ 0.1135,  0.1336, -0.0417],
             [-0.0304, -0.3204, -0.2328],
             [-0.0380, -0.0607, -0.2473]]],
   
   
           [[[ 0.3332, -0.2677,  0.2344],
             [-0.1888,  0.3219,  0.0655],
             [ 0.3100,  0.0799,  0.1524]]],
   
   
           [[[ 0.1075,  0.2366,  0.1051],
             [ 0.3175, -0.2650,  0.2858],
             [-0.2083, -0.0630, -0.1849]]],
   
   
           [[[ 0.2503, -0.1142,  0.1655],
             [ 0.0274, -0.0398,  0.1931],
             [ 0.0168,  0.3125, -0.1434]]],
   
   
           [[[-0.2091, -0.2358, -0.1158],
             [-0.2343,  0.2855,  0.2813],
             [ 0.2977,  0.1809, -0.1326]]],
   
   
           [[[ 0.2589,  0.3232,  0.0187],
             [ 0.1462,  0.0586, -0.0860],
             [-0.2801, -0.3242,  0.0103]]],
   
   
           [[

In [9]:
type(optimizer.param_groups)

list

In [7]:
len(optimizer.param_groups)

1

In [10]:
group = optimizer.param_groups[0]

In [11]:
group

{'params': [Parameter containing:
  tensor([[[[-0.0332,  0.2192,  0.2646],
            [-0.1101,  0.1869,  0.1640],
            [ 0.0169,  0.2177, -0.2522]]],
  
  
          [[[ 0.1135,  0.1336, -0.0417],
            [-0.0304, -0.3204, -0.2328],
            [-0.0380, -0.0607, -0.2473]]],
  
  
          [[[ 0.3332, -0.2677,  0.2344],
            [-0.1888,  0.3219,  0.0655],
            [ 0.3100,  0.0799,  0.1524]]],
  
  
          [[[ 0.1075,  0.2366,  0.1051],
            [ 0.3175, -0.2650,  0.2858],
            [-0.2083, -0.0630, -0.1849]]],
  
  
          [[[ 0.2503, -0.1142,  0.1655],
            [ 0.0274, -0.0398,  0.1931],
            [ 0.0168,  0.3125, -0.1434]]],
  
  
          [[[-0.2091, -0.2358, -0.1158],
            [-0.2343,  0.2855,  0.2813],
            [ 0.2977,  0.1809, -0.1326]]],
  
  
          [[[ 0.2589,  0.3232,  0.0187],
            [ 0.1462,  0.0586, -0.0860],
            [-0.2801, -0.3242,  0.0103]]],
  
  
          [[[ 0.2025, -0.0681,  0.1103],
        

In [16]:
optimizer.state_dict()

{'state': {},
 'param_groups': [{'lr': 0.1,
   'momentum': 0.9,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'params': [2639043659880,
    2639043659960,
    2639043660120,
    2639043660200,
    2639043660280,
    2639043660360,
    2639043660440,
    2639043660520]}]}

In [13]:
group.keys()

dict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])

In [35]:
bool_value = True

In [37]:
torch.tensor(bool_value).type()

'torch.BoolTensor'

In [19]:
len(group['params'])

8

In [33]:
model.register_parameter("dummy_param", torch.nn.Parameter(data=torch.tensor(1.), requires_grad=True))

In [34]:
for name, param in model.named_parameters():
    print(name)
    print(param)

dummy_param
Parameter containing:
tensor(1., requires_grad=True)
conv1.weight
Parameter containing:
tensor([[[[-0.0332,  0.2192,  0.2646],
          [-0.1101,  0.1869,  0.1640],
          [ 0.0169,  0.2177, -0.2522]]],


        [[[ 0.1135,  0.1336, -0.0417],
          [-0.0304, -0.3204, -0.2328],
          [-0.0380, -0.0607, -0.2473]]],


        [[[ 0.3332, -0.2677,  0.2344],
          [-0.1888,  0.3219,  0.0655],
          [ 0.3100,  0.0799,  0.1524]]],


        [[[ 0.1075,  0.2366,  0.1051],
          [ 0.3175, -0.2650,  0.2858],
          [-0.2083, -0.0630, -0.1849]]],


        [[[ 0.2503, -0.1142,  0.1655],
          [ 0.0274, -0.0398,  0.1931],
          [ 0.0168,  0.3125, -0.1434]]],


        [[[-0.2091, -0.2358, -0.1158],
          [-0.2343,  0.2855,  0.2813],
          [ 0.2977,  0.1809, -0.1326]]],


        [[[ 0.2589,  0.3232,  0.0187],
          [ 0.1462,  0.0586, -0.0860],
          [-0.2801, -0.3242,  0.0103]]],


        [[[ 0.2025, -0.0681,  0.1103],
          [ 0.

Parameter containing:
tensor([-1.0137e-02, -5.2471e-03,  6.4464e-03, -8.8959e-03, -4.1964e-03,
        -8.9365e-03,  9.3075e-03, -8.3678e-03, -1.2699e-03,  6.4354e-03,
        -7.5405e-03, -7.1581e-03,  6.2892e-03, -4.3565e-03,  9.6864e-04,
         1.2934e-03,  7.2014e-03,  6.1364e-03, -8.5526e-03, -5.0565e-03,
        -5.8812e-03,  5.0605e-03, -4.7549e-03, -9.2007e-03,  7.8199e-03,
         9.6133e-03, -6.8229e-03, -3.2843e-03, -1.7764e-03,  1.0573e-03,
         1.5702e-05,  6.9043e-03,  8.0103e-03,  9.8005e-03, -7.9141e-04,
        -2.3573e-03,  3.9852e-03, -5.9939e-03,  3.6848e-03,  8.3942e-03,
        -9.8362e-03, -2.5385e-03, -9.3455e-03, -3.2834e-03,  2.2510e-03,
         4.2702e-03,  1.4032e-04,  1.9502e-03,  6.7923e-03, -4.3363e-03,
         6.1735e-03,  7.5405e-03, -8.7828e-03, -4.8395e-03, -6.5577e-03,
         4.2128e-03, -4.0464e-03,  8.1220e-03,  6.9271e-03,  8.0823e-03,
         2.0283e-03, -4.7643e-03, -3.7090e-03,  7.7751e-03, -5.4223e-03,
        -2.6594e-03, -9.6681e

In [22]:
len(optimizer.param_groups[0]['params'])

8

In [10]:
optimizer.param_groups[0]['lr']

0.1

In [None]:
group

In [11]:
parameters = group['params']

In [13]:
len(parameters)

8

In [12]:
parameters

[Parameter containing:
 tensor([[[[ 0.2992,  0.0415,  0.1874],
           [ 0.2128,  0.1912,  0.0177],
           [-0.0716, -0.3304, -0.1873]]],
 
 
         [[[ 0.1323, -0.0467,  0.0316],
           [ 0.2406,  0.2456, -0.2723],
           [ 0.2765, -0.0848,  0.2515]]],
 
 
         [[[-0.1289,  0.0084, -0.2524],
           [-0.2310, -0.2067, -0.2992],
           [-0.2449,  0.0686, -0.1524]]],
 
 
         [[[-0.2852, -0.2371,  0.2585],
           [ 0.2860, -0.1991, -0.1926],
           [-0.2471, -0.0411,  0.2414]]],
 
 
         [[[ 0.0875, -0.0554,  0.1339],
           [-0.2324,  0.3246,  0.1637],
           [ 0.1022,  0.1181,  0.2366]]],
 
 
         [[[ 0.1815, -0.1313, -0.1170],
           [-0.1389,  0.1520,  0.0507],
           [-0.2228,  0.1516,  0.2179]]],
 
 
         [[[-0.1073, -0.2914, -0.1875],
           [ 0.0361,  0.1776, -0.0176],
           [ 0.1927,  0.3073, -0.2285]]],
 
 
         [[[-0.1166, -0.0266, -0.0521],
           [ 0.1116, -0.2810,  0.2748],
           [-0.

In [12]:
parameters[0]

Parameter containing:
tensor([[[[-0.3108,  0.1116,  0.3054],
          [-0.3128, -0.0290, -0.1490],
          [-0.2491, -0.2264, -0.1127]]],


        [[[ 0.2384, -0.0073,  0.0537],
          [-0.1314,  0.2111, -0.1684],
          [-0.2062, -0.0136,  0.2124]]],


        [[[-0.0936,  0.2458,  0.3264],
          [-0.1991,  0.1401,  0.3245],
          [-0.0773, -0.0878, -0.2985]]],


        [[[ 0.0485,  0.0681,  0.0031],
          [ 0.1076, -0.2443,  0.0430],
          [-0.1331, -0.3076, -0.1896]]],


        [[[-0.0285, -0.3064, -0.2527],
          [ 0.0350, -0.0148,  0.1634],
          [ 0.2579, -0.0379,  0.2057]]],


        [[[ 0.1894, -0.1798, -0.1854],
          [-0.2987, -0.0931, -0.1017],
          [-0.2677,  0.0693, -0.0506]]],


        [[[-0.3224, -0.1736, -0.2891],
          [ 0.2825,  0.0170,  0.0413],
          [-0.2621,  0.0958,  0.1171]]],


        [[[-0.0388,  0.2726,  0.1130],
          [-0.0557,  0.0339,  0.1181],
          [ 0.1739,  0.1216,  0.2301]]],


        [[

In [13]:
type(parameters[0])

torch.nn.parameter.Parameter

In [14]:
param_tensor = parameters[0].data

In [15]:
type(param_tensor)

torch.Tensor

In [16]:
len(parameters)

8

In [17]:
parameters[1]

Parameter containing:
tensor([ 0.1732,  0.0156, -0.1424,  0.2263, -0.1595, -0.1775, -0.3182,  0.0562,
         0.1604,  0.0107,  0.0971, -0.3222, -0.0087, -0.3162, -0.1337, -0.2749,
         0.3249, -0.0460,  0.0480,  0.1197, -0.0072,  0.0725,  0.0649, -0.0651,
        -0.2702,  0.2664, -0.0266, -0.1773,  0.0602, -0.0155, -0.0295,  0.1996],
       requires_grad=True)

In [18]:
layers = []
stat_names = []

for name, layer in model.named_modules():
    if type(layer) == nn.Conv2d or type(layer) == nn.Linear:
        if len(layers) > 0:  # there is a shift of 1 in the name: for layer conv1 we use stats['conv2'] for example for the original MNIST net.
            stat_names.append(name)
        layers.append(layer)

In [2]:
int(-127.8)

-127

In [3]:
round(-127.8)

-128

In [4]:
round(-127.3)

-127

In [8]:
round(127.1)

127

In [6]:
t1 = torch.ones(10, 10)
t1

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [7]:
t2 = torch.ones(10, 1)
t2

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])

In [8]:
t1 + t2  # Test how the automatic broadcast works

tensor([[2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]])

In [10]:
torch.tensor([1, 2, 3, 4])[1:-1]

tensor([2, 3])

In [2]:
t = torch.tensor([-5, 2, 3, 8, 10, 15, 50])

In [3]:
t.clamp(0, 10)

tensor([ 0,  2,  3,  8, 10, 10, 10])

In [4]:
t

tensor([-5,  2,  3,  8, 10, 15, 50])

In [8]:
(t < 0).sum().item()

1

In [7]:
t = torch.tensor(2 ** 40)

In [8]:
t

tensor(1099511627776)

In [9]:
t.type()

'torch.LongTensor'

In [14]:
t2 = t.long()

In [15]:
t2

tensor(1099511627776)

In [16]:
t2.type()

'torch.LongTensor'

In [2]:
t = torch.tensor([[1, 2, 3], [4, 5, 6]])

In [6]:
t.view(3, -1)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [7]:
t

tensor([[1, 2, 3],
        [4, 5, 6]])

In [9]:
class Net2(torch.nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.seq = nn.Sequential(
            nn.Linear(100, 10),
            nn.ReLU())

    def forward(self, x):
        return self.seq(x)

In [10]:
net = Net2()

In [15]:
for layer in net.seq:
    layer.hahahah = nn.ReLU()
    print(layer)

Linear(
  in_features=100, out_features=10, bias=True
  (hahahah): ReLU()
)
ReLU(
  (hahahah): ReLU()
)


In [17]:
for layer in net.seq:
    print("test")

test
test


In [18]:
for idx, layer in enumerate(net.seq):
    print("test")

test
test


In [4]:
mod = nn.BatchNorm2d(3)

In [6]:
for p in mod.parameters():
    print(p)

Parameter containing:
tensor([1., 1., 1.], requires_grad=True)
Parameter containing:
tensor([0., 0., 0.], requires_grad=True)


In [30]:
mod = nn.Linear(10, 100, bias=False)

In [31]:
if mod.bias is not None:
    print("test")

In [25]:
type(None)

NoneType

In [33]:
for v1, v2, v3 in zip([1, 2, 3, 4, 5, 6], ['apple', 'banana', 'tomato', 'pear'], ['red', 'green', 'blue']):
    print(v1)
    print(v2)
    print(v3)

1
apple
red
2
banana
green
3
tomato
blue


In [12]:
arr = np.array([0, 0, 0, 0, 1, 2, 1231, 1239123])

In [13]:
np.quantile(arr, 0.5)

0.5

In [20]:
np.concatenate((np.arange(0.0, 1.0, 0.02),(np.array(1.0))))

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 0 dimension(s)

In [26]:
np.arange(0.0, 1.0, 0.02).tolist() + [1.0]

[0.0,
 0.02,
 0.04,
 0.06,
 0.08,
 0.1,
 0.12,
 0.14,
 0.16,
 0.18,
 0.2,
 0.22,
 0.24,
 0.26,
 0.28,
 0.3,
 0.32,
 0.34,
 0.36,
 0.38,
 0.4,
 0.42,
 0.44,
 0.46,
 0.48,
 0.5,
 0.52,
 0.54,
 0.56,
 0.58,
 0.6,
 0.62,
 0.64,
 0.66,
 0.68,
 0.7000000000000001,
 0.72,
 0.74,
 0.76,
 0.78,
 0.8,
 0.8200000000000001,
 0.84,
 0.86,
 0.88,
 0.9,
 0.92,
 0.9400000000000001,
 0.96,
 0.98,
 1.0]

In [22]:
(np.array(1.0))

array(1.)

In [28]:
t = torch.tensor([[1,2,3], [4,5,6]])

In [37]:
t.size()

torch.Size([2, 3])

In [36]:
t.shape

torch.Size([2, 3])

In [38]:
np.prod(t.shape)

6

In [35]:
print('{:0.5f}'.format(0.99999 * 100).rstrip('0').rstrip('.'))

99.999


In [17]:
print((0.9999999 * 100))

99.99999000000001


In [46]:
data = np.arange(0, 100, dtype=float)
data[99] = 150230
data

array([0.0000e+00, 1.0000e+00, 2.0000e+00, 3.0000e+00, 4.0000e+00,
       5.0000e+00, 6.0000e+00, 7.0000e+00, 8.0000e+00, 9.0000e+00,
       1.0000e+01, 1.1000e+01, 1.2000e+01, 1.3000e+01, 1.4000e+01,
       1.5000e+01, 1.6000e+01, 1.7000e+01, 1.8000e+01, 1.9000e+01,
       2.0000e+01, 2.1000e+01, 2.2000e+01, 2.3000e+01, 2.4000e+01,
       2.5000e+01, 2.6000e+01, 2.7000e+01, 2.8000e+01, 2.9000e+01,
       3.0000e+01, 3.1000e+01, 3.2000e+01, 3.3000e+01, 3.4000e+01,
       3.5000e+01, 3.6000e+01, 3.7000e+01, 3.8000e+01, 3.9000e+01,
       4.0000e+01, 4.1000e+01, 4.2000e+01, 4.3000e+01, 4.4000e+01,
       4.5000e+01, 4.6000e+01, 4.7000e+01, 4.8000e+01, 4.9000e+01,
       5.0000e+01, 5.1000e+01, 5.2000e+01, 5.3000e+01, 5.4000e+01,
       5.5000e+01, 5.6000e+01, 5.7000e+01, 5.8000e+01, 5.9000e+01,
       6.0000e+01, 6.1000e+01, 6.2000e+01, 6.3000e+01, 6.4000e+01,
       6.5000e+01, 6.6000e+01, 6.7000e+01, 6.8000e+01, 6.9000e+01,
       7.0000e+01, 7.1000e+01, 7.2000e+01, 7.3000e+01, 7.4000e

In [58]:
np.quantile(data, 0.999, interpolation='higher')

150230.0

In [17]:
1./999

0.001001001001001001

In [3]:
torch.tensor([0]).shape

torch.Size([1])

In [9]:
torch.zeros(2, 3, 4).fill_(torch.tensor([1, 2]))

RuntimeError: fill_ only supports 0-dimension value tensor but got tensor with 1 dimensions.

In [8]:
torch.tensor([1, 2])

tensor([1, 2])

Multiply 2 tensors along axis

In [15]:
weights = torch.randn(2,3,4,5)
weights

tensor([[[[-1.2180, -0.2089, -0.5368,  1.7205, -0.1584],
          [-0.1972, -0.5135, -0.0324, -0.4760, -0.2820],
          [-0.4943,  0.5576, -1.4509,  1.6600, -1.8798],
          [ 0.4115, -0.8505,  0.5678, -1.5237, -1.5375]],

         [[-0.2204, -0.4010,  0.5943, -0.4756,  1.0980],
          [-0.3401, -1.2484, -1.0479, -1.1043, -2.3587],
          [ 0.1568,  0.1681,  0.1188,  0.3845, -0.2739],
          [ 0.3806, -1.0767,  1.7922,  1.7054,  0.1861]],

         [[ 0.2428, -1.6569,  1.3763, -1.1468, -0.8154],
          [-0.6114, -0.6892, -0.3952,  0.7748,  0.0493],
          [-0.4923, -0.8790,  1.0159,  0.1813,  1.4233],
          [ 1.3455,  0.9331, -0.5588,  1.2757, -0.5273]]],


        [[[ 1.6197,  0.1576, -1.0446, -0.6231, -1.0481],
          [ 0.7403, -0.9260,  0.9821, -0.5958, -0.1568],
          [-0.6225,  0.4277, -0.6419, -0.2251, -1.1038],
          [-0.6589, -0.0214,  0.8524, -1.1662, -0.9385]],

         [[-0.5063,  1.2279,  0.7879, -0.5166,  0.2398],
          [-0.3489,  

In [16]:
mults = torch.randn(3)
mults

tensor([ 0.6645, -0.6273, -2.0973])

In [21]:
result_baseline = torch.clone(weights)
for channel in range(result_baseline.shape[1]):
    result_baseline[:, channel, :, :] *= mults[channel]
result_baseline

tensor([[[[-0.8094, -0.1388, -0.3567,  1.1433, -0.1052],
          [-0.1311, -0.3412, -0.0215, -0.3163, -0.1874],
          [-0.3284,  0.3706, -0.9641,  1.1031, -1.2492],
          [ 0.2734, -0.5652,  0.3773, -1.0125, -1.0217]],

         [[ 0.1383,  0.2515, -0.3728,  0.2984, -0.6888],
          [ 0.2133,  0.7832,  0.6574,  0.6927,  1.4797],
          [-0.0983, -0.1054, -0.0745, -0.2412,  0.1718],
          [-0.2387,  0.6755, -1.1243, -1.0699, -0.1168]],

         [[-0.5092,  3.4750, -2.8864,  2.4051,  1.7102],
          [ 1.2822,  1.4455,  0.8288, -1.6250, -0.1034],
          [ 1.0326,  1.8435, -2.1306, -0.3802, -2.9850],
          [-2.8220, -1.9571,  1.1719, -2.6755,  1.1059]]],


        [[[ 1.0763,  0.1048, -0.6942, -0.4141, -0.6965],
          [ 0.4919, -0.6153,  0.6526, -0.3959, -0.1042],
          [-0.4137,  0.2842, -0.4265, -0.1496, -0.7335],
          [-0.4378, -0.0142,  0.5664, -0.7749, -0.6236]],

         [[ 0.3176, -0.7703, -0.4943,  0.3241, -0.1504],
          [ 0.2189, -

In [22]:
torch.einsum("c,nchw->nchw", mults, weights)

tensor([[[[-0.8094, -0.1388, -0.3567,  1.1433, -0.1052],
          [-0.1311, -0.3412, -0.0215, -0.3163, -0.1874],
          [-0.3284,  0.3706, -0.9641,  1.1031, -1.2492],
          [ 0.2734, -0.5652,  0.3773, -1.0125, -1.0217]],

         [[ 0.1383,  0.2515, -0.3728,  0.2984, -0.6888],
          [ 0.2133,  0.7832,  0.6574,  0.6927,  1.4797],
          [-0.0983, -0.1054, -0.0745, -0.2412,  0.1718],
          [-0.2387,  0.6755, -1.1243, -1.0699, -0.1168]],

         [[-0.5092,  3.4750, -2.8864,  2.4051,  1.7102],
          [ 1.2822,  1.4455,  0.8288, -1.6250, -0.1034],
          [ 1.0326,  1.8435, -2.1306, -0.3802, -2.9850],
          [-2.8220, -1.9571,  1.1719, -2.6755,  1.1059]]],


        [[[ 1.0763,  0.1048, -0.6942, -0.4141, -0.6965],
          [ 0.4919, -0.6153,  0.6526, -0.3959, -0.1042],
          [-0.4137,  0.2842, -0.4265, -0.1496, -0.7335],
          [-0.4378, -0.0142,  0.5664, -0.7749, -0.6236]],

         [[ 0.3176, -0.7703, -0.4943,  0.3241, -0.1504],
          [ 0.2189, -

In [30]:
t = torch.tensor([1.,2.,3.])
t

tensor([1., 2., 3.])

In [31]:
torch.ones_like(t).true_divide(t)

tensor([1.0000, 0.5000, 0.3333])

In [33]:
1 / t

tensor([1.0000, 0.5000, 0.3333])

In [45]:
2 ** t

tensor([2., 4., 8.])

Add along axis (unsqueeze other dimensions)

In [42]:
zps = torch.randn(3).unsqueeze(0).unsqueeze(2).unsqueeze(3) * 100
zps

tensor([[[[ 11.8179]],

         [[ 61.6964]],

         [[132.2990]]]])

In [43]:
weights + zps

tensor([[[[ 10.5999,  11.6090,  11.2811,  13.5384,  11.6595],
          [ 11.6207,  11.3044,  11.7855,  11.3418,  11.5359],
          [ 11.3236,  12.3755,  10.3670,  13.4778,   9.9380],
          [ 12.2293,  10.9674,  12.3857,  10.2941,  10.2803]],

         [[ 61.4760,  61.2955,  62.2907,  61.2208,  62.7945],
          [ 61.3564,  60.4480,  60.6485,  60.5922,  59.3377],
          [ 61.8532,  61.8645,  61.8152,  62.0809,  61.4225],
          [ 62.0770,  60.6197,  63.4886,  63.4018,  61.8825]],

         [[132.5417, 130.6420, 133.6752, 131.1522, 131.4835],
          [131.6876, 131.6097, 131.9038, 133.0738, 132.3483],
          [131.8066, 131.4200, 133.3149, 132.4803, 133.7222],
          [133.6445, 133.2321, 131.7402, 133.5747, 131.7717]]],


        [[[ 13.4376,  11.9755,  10.7732,  11.1948,  10.7698],
          [ 12.5581,  10.8919,  12.7999,  11.2221,  11.6610],
          [ 11.1953,  12.2456,  11.1760,  11.5927,  10.7141],
          [ 11.1590,  11.7964,  12.6703,  10.6517,  10.8794]],

In [14]:
t = torch.ones(2,3,4,5, dtype=torch.int32) * 1000
t

tensor([[[[1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000]],

         [[1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000]],

         [[1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000]]],


        [[[1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000]],

         [[1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000]],

         [[1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000, 1000, 1000, 1000],
          [1000, 1000,

In [15]:
shifts = torch.tensor([6,7,8])

In [17]:
for channel in range(t.shape[1]):
    t[:, channel, :, :] = t[:, channel, :, :] >> shifts[channel]

In [18]:
t

tensor([[[[15, 15, 15, 15, 15],
          [15, 15, 15, 15, 15],
          [15, 15, 15, 15, 15],
          [15, 15, 15, 15, 15]],

         [[ 7,  7,  7,  7,  7],
          [ 7,  7,  7,  7,  7],
          [ 7,  7,  7,  7,  7],
          [ 7,  7,  7,  7,  7]],

         [[ 3,  3,  3,  3,  3],
          [ 3,  3,  3,  3,  3],
          [ 3,  3,  3,  3,  3],
          [ 3,  3,  3,  3,  3]]],


        [[[15, 15, 15, 15, 15],
          [15, 15, 15, 15, 15],
          [15, 15, 15, 15, 15],
          [15, 15, 15, 15, 15]],

         [[ 7,  7,  7,  7,  7],
          [ 7,  7,  7,  7,  7],
          [ 7,  7,  7,  7,  7],
          [ 7,  7,  7,  7,  7]],

         [[ 3,  3,  3,  3,  3],
          [ 3,  3,  3,  3,  3],
          [ 3,  3,  3,  3,  3],
          [ 3,  3,  3,  3,  3]]]], dtype=torch.int32)