In [1]:
import torch
from torch import nn
from torch import optim
import torchvision.datasets as datasets
import torch.nn.functional as F
import numpy as np

In [2]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [5]:
mnist = datasets.MNIST('', download=True)
model = Net()
loss_fn = nn.CrossEntropyLoss()

In [6]:
conv = model.conv1


In [7]:
for layer in model.children():
    print(layer)

Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
Dropout2d(p=0.25, inplace=False)
Dropout2d(p=0.5, inplace=False)
Linear(in_features=9216, out_features=128, bias=True)
Linear(in_features=128, out_features=10, bias=True)


In [8]:
for name, param in conv.named_parameters():
    print(name)
    print(param.data)

weight
tensor([[[[-0.2013,  0.3206,  0.1812],
          [-0.2039, -0.2952, -0.2669],
          [ 0.1593,  0.2103,  0.3120]]],


        [[[-0.1090,  0.1664,  0.3006],
          [ 0.2718, -0.1986,  0.2445],
          [-0.0489,  0.3046, -0.2292]]],


        [[[-0.1002, -0.0133,  0.0890],
          [-0.1891,  0.1809,  0.1147],
          [-0.1709, -0.2062, -0.2107]]],


        [[[-0.0565, -0.1469, -0.0127],
          [ 0.0549,  0.0853,  0.0829],
          [-0.2383, -0.0052, -0.0918]]],


        [[[-0.1237, -0.1319,  0.2242],
          [ 0.1508,  0.2541, -0.2002],
          [ 0.1440, -0.1958, -0.1021]]],


        [[[ 0.0611, -0.1701,  0.1303],
          [ 0.2695,  0.0617, -0.2525],
          [ 0.0821,  0.2737, -0.2139]]],


        [[[-0.2895,  0.2501,  0.2785],
          [ 0.0461, -0.0440,  0.1576],
          [-0.2275, -0.0297,  0.1099]]],


        [[[-0.0244, -0.2327,  0.0581],
          [ 0.1384,  0.2797,  0.0633],
          [-0.2827, -0.1252, -0.2717]]],


        [[[ 0.1015,  0.28

In [24]:
for param in model.state_dict():
    print(repr(model.state_dict()[param]))

tensor([[[[ 0.2992,  0.0415,  0.1874],
          [ 0.2128,  0.1912,  0.0177],
          [-0.0716, -0.3304, -0.1873]]],


        [[[ 0.1323, -0.0467,  0.0316],
          [ 0.2406,  0.2456, -0.2723],
          [ 0.2765, -0.0848,  0.2515]]],


        [[[-0.1289,  0.0084, -0.2524],
          [-0.2310, -0.2067, -0.2992],
          [-0.2449,  0.0686, -0.1524]]],


        [[[-0.2852, -0.2371,  0.2585],
          [ 0.2860, -0.1991, -0.1926],
          [-0.2471, -0.0411,  0.2414]]],


        [[[ 0.0875, -0.0554,  0.1339],
          [-0.2324,  0.3246,  0.1637],
          [ 0.1022,  0.1181,  0.2366]]],


        [[[ 0.1815, -0.1313, -0.1170],
          [-0.1389,  0.1520,  0.0507],
          [-0.2228,  0.1516,  0.2179]]],


        [[[-0.1073, -0.2914, -0.1875],
          [ 0.0361,  0.1776, -0.0176],
          [ 0.1927,  0.3073, -0.2285]]],


        [[[-0.1166, -0.0266, -0.0521],
          [ 0.1116, -0.2810,  0.2748],
          [-0.1927,  0.1796,  0.2141]]],


        [[[ 0.2223,  0.0928,  0.

In [9]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

In [5]:
optimizer.param_groups

[{'params': [Parameter containing:
   tensor([[[[ 0.2992,  0.0415,  0.1874],
             [ 0.2128,  0.1912,  0.0177],
             [-0.0716, -0.3304, -0.1873]]],
   
   
           [[[ 0.1323, -0.0467,  0.0316],
             [ 0.2406,  0.2456, -0.2723],
             [ 0.2765, -0.0848,  0.2515]]],
   
   
           [[[-0.1289,  0.0084, -0.2524],
             [-0.2310, -0.2067, -0.2992],
             [-0.2449,  0.0686, -0.1524]]],
   
   
           [[[-0.2852, -0.2371,  0.2585],
             [ 0.2860, -0.1991, -0.1926],
             [-0.2471, -0.0411,  0.2414]]],
   
   
           [[[ 0.0875, -0.0554,  0.1339],
             [-0.2324,  0.3246,  0.1637],
             [ 0.1022,  0.1181,  0.2366]]],
   
   
           [[[ 0.1815, -0.1313, -0.1170],
             [-0.1389,  0.1520,  0.0507],
             [-0.2228,  0.1516,  0.2179]]],
   
   
           [[[-0.1073, -0.2914, -0.1875],
             [ 0.0361,  0.1776, -0.0176],
             [ 0.1927,  0.3073, -0.2285]]],
   
   
           [[

In [6]:
type(optimizer.param_groups)

list

In [7]:
len(optimizer.param_groups)

1

In [8]:
group = optimizer.param_groups[0]

In [9]:
group

{'params': [Parameter containing:
  tensor([[[[ 0.2992,  0.0415,  0.1874],
            [ 0.2128,  0.1912,  0.0177],
            [-0.0716, -0.3304, -0.1873]]],
  
  
          [[[ 0.1323, -0.0467,  0.0316],
            [ 0.2406,  0.2456, -0.2723],
            [ 0.2765, -0.0848,  0.2515]]],
  
  
          [[[-0.1289,  0.0084, -0.2524],
            [-0.2310, -0.2067, -0.2992],
            [-0.2449,  0.0686, -0.1524]]],
  
  
          [[[-0.2852, -0.2371,  0.2585],
            [ 0.2860, -0.1991, -0.1926],
            [-0.2471, -0.0411,  0.2414]]],
  
  
          [[[ 0.0875, -0.0554,  0.1339],
            [-0.2324,  0.3246,  0.1637],
            [ 0.1022,  0.1181,  0.2366]]],
  
  
          [[[ 0.1815, -0.1313, -0.1170],
            [-0.1389,  0.1520,  0.0507],
            [-0.2228,  0.1516,  0.2179]]],
  
  
          [[[-0.1073, -0.2914, -0.1875],
            [ 0.0361,  0.1776, -0.0176],
            [ 0.1927,  0.3073, -0.2285]]],
  
  
          [[[-0.1166, -0.0266, -0.0521],
        

In [10]:
optimizer.param_groups[0]['lr']

0.1

In [None]:
group

In [11]:
parameters = group['params']

In [13]:
len(parameters)

8

In [12]:
parameters

[Parameter containing:
 tensor([[[[ 0.2992,  0.0415,  0.1874],
           [ 0.2128,  0.1912,  0.0177],
           [-0.0716, -0.3304, -0.1873]]],
 
 
         [[[ 0.1323, -0.0467,  0.0316],
           [ 0.2406,  0.2456, -0.2723],
           [ 0.2765, -0.0848,  0.2515]]],
 
 
         [[[-0.1289,  0.0084, -0.2524],
           [-0.2310, -0.2067, -0.2992],
           [-0.2449,  0.0686, -0.1524]]],
 
 
         [[[-0.2852, -0.2371,  0.2585],
           [ 0.2860, -0.1991, -0.1926],
           [-0.2471, -0.0411,  0.2414]]],
 
 
         [[[ 0.0875, -0.0554,  0.1339],
           [-0.2324,  0.3246,  0.1637],
           [ 0.1022,  0.1181,  0.2366]]],
 
 
         [[[ 0.1815, -0.1313, -0.1170],
           [-0.1389,  0.1520,  0.0507],
           [-0.2228,  0.1516,  0.2179]]],
 
 
         [[[-0.1073, -0.2914, -0.1875],
           [ 0.0361,  0.1776, -0.0176],
           [ 0.1927,  0.3073, -0.2285]]],
 
 
         [[[-0.1166, -0.0266, -0.0521],
           [ 0.1116, -0.2810,  0.2748],
           [-0.

In [12]:
parameters[0]

Parameter containing:
tensor([[[[-0.3108,  0.1116,  0.3054],
          [-0.3128, -0.0290, -0.1490],
          [-0.2491, -0.2264, -0.1127]]],


        [[[ 0.2384, -0.0073,  0.0537],
          [-0.1314,  0.2111, -0.1684],
          [-0.2062, -0.0136,  0.2124]]],


        [[[-0.0936,  0.2458,  0.3264],
          [-0.1991,  0.1401,  0.3245],
          [-0.0773, -0.0878, -0.2985]]],


        [[[ 0.0485,  0.0681,  0.0031],
          [ 0.1076, -0.2443,  0.0430],
          [-0.1331, -0.3076, -0.1896]]],


        [[[-0.0285, -0.3064, -0.2527],
          [ 0.0350, -0.0148,  0.1634],
          [ 0.2579, -0.0379,  0.2057]]],


        [[[ 0.1894, -0.1798, -0.1854],
          [-0.2987, -0.0931, -0.1017],
          [-0.2677,  0.0693, -0.0506]]],


        [[[-0.3224, -0.1736, -0.2891],
          [ 0.2825,  0.0170,  0.0413],
          [-0.2621,  0.0958,  0.1171]]],


        [[[-0.0388,  0.2726,  0.1130],
          [-0.0557,  0.0339,  0.1181],
          [ 0.1739,  0.1216,  0.2301]]],


        [[

In [13]:
type(parameters[0])

torch.nn.parameter.Parameter

In [14]:
param_tensor = parameters[0].data

In [15]:
type(param_tensor)

torch.Tensor

In [16]:
len(parameters)

8

In [17]:
parameters[1]

Parameter containing:
tensor([ 0.1732,  0.0156, -0.1424,  0.2263, -0.1595, -0.1775, -0.3182,  0.0562,
         0.1604,  0.0107,  0.0971, -0.3222, -0.0087, -0.3162, -0.1337, -0.2749,
         0.3249, -0.0460,  0.0480,  0.1197, -0.0072,  0.0725,  0.0649, -0.0651,
        -0.2702,  0.2664, -0.0266, -0.1773,  0.0602, -0.0155, -0.0295,  0.1996],
       requires_grad=True)

In [18]:
layers = []
stat_names = []

for name, layer in model.named_modules():
    if type(layer) == nn.Conv2d or type(layer) == nn.Linear:
        if len(layers) > 0:  # there is a shift of 1 in the name: for layer conv1 we use stats['conv2'] for example for the original MNIST net.
            stat_names.append(name)
        layers.append(layer)

In [2]:
int(-127.8)

-127

In [3]:
round(-127.8)

-128

In [4]:
round(-127.3)

-127

In [8]:
round(127.1)

127

In [6]:
t1 = torch.ones(10, 10)
t1

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [7]:
t2 = torch.ones(10, 1)
t2

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])

In [8]:
t1 + t2  # Test how the automatic broadcast works

tensor([[2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]])

In [10]:
torch.tensor([1, 2, 3, 4])[1:-1]

tensor([2, 3])

In [2]:
t = torch.tensor([-5, 2, 3, 8, 10, 15, 50])

In [3]:
t.clamp(0, 10)

tensor([ 0,  2,  3,  8, 10, 10, 10])

In [4]:
t

tensor([-5,  2,  3,  8, 10, 15, 50])

In [8]:
(t < 0).sum().item()

1

In [7]:
t = torch.tensor(2 ** 40)

In [8]:
t

tensor(1099511627776)

In [9]:
t.type()

'torch.LongTensor'

In [14]:
t2 = t.long()

In [15]:
t2

tensor(1099511627776)

In [16]:
t2.type()

'torch.LongTensor'

In [2]:
t = torch.tensor([[1, 2, 3], [4, 5, 6]])

In [6]:
t.view(3, -1)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [7]:
t

tensor([[1, 2, 3],
        [4, 5, 6]])

In [9]:
class Net2(torch.nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.seq = nn.Sequential(
            nn.Linear(100, 10),
            nn.ReLU())

    def forward(self, x):
        return self.seq(x)

In [10]:
net = Net2()

In [15]:
for layer in net.seq:
    layer.hahahah = nn.ReLU()
    print(layer)

Linear(
  in_features=100, out_features=10, bias=True
  (hahahah): ReLU()
)
ReLU(
  (hahahah): ReLU()
)


In [17]:
for layer in net.seq:
    print("test")

test
test


In [18]:
for idx, layer in enumerate(net.seq):
    print("test")

test
test


In [4]:
mod = nn.BatchNorm2d(3)

In [6]:
for p in mod.parameters():
    print(p)

Parameter containing:
tensor([1., 1., 1.], requires_grad=True)
Parameter containing:
tensor([0., 0., 0.], requires_grad=True)


In [30]:
mod = nn.Linear(10, 100, bias=False)

In [31]:
if mod.bias is not None:
    print("test")

In [25]:
type(None)

NoneType

In [33]:
for v1, v2, v3 in zip([1, 2, 3, 4, 5, 6], ['apple', 'banana', 'tomato', 'pear'], ['red', 'green', 'blue']):
    print(v1)
    print(v2)
    print(v3)

1
apple
red
2
banana
green
3
tomato
blue


In [12]:
arr = np.array([0, 0, 0, 0, 1, 2, 1231, 1239123])

In [13]:
np.quantile(arr, 0.5)

0.5

In [20]:
np.concatenate((np.arange(0.0, 1.0, 0.02),(np.array(1.0))))

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 1 dimension(s) and the array at index 1 has 0 dimension(s)

In [26]:
np.arange(0.0, 1.0, 0.02).tolist() + [1.0]

[0.0,
 0.02,
 0.04,
 0.06,
 0.08,
 0.1,
 0.12,
 0.14,
 0.16,
 0.18,
 0.2,
 0.22,
 0.24,
 0.26,
 0.28,
 0.3,
 0.32,
 0.34,
 0.36,
 0.38,
 0.4,
 0.42,
 0.44,
 0.46,
 0.48,
 0.5,
 0.52,
 0.54,
 0.56,
 0.58,
 0.6,
 0.62,
 0.64,
 0.66,
 0.68,
 0.7000000000000001,
 0.72,
 0.74,
 0.76,
 0.78,
 0.8,
 0.8200000000000001,
 0.84,
 0.86,
 0.88,
 0.9,
 0.92,
 0.9400000000000001,
 0.96,
 0.98,
 1.0]

In [22]:
(np.array(1.0))

array(1.)

In [28]:
t = torch.tensor([[1,2,3], [4,5,6]])

In [37]:
t.size()

torch.Size([2, 3])

In [36]:
t.shape

torch.Size([2, 3])

In [38]:
np.prod(t.shape)

6

In [35]:
print('{:0.5f}'.format(0.99999 * 100).rstrip('0').rstrip('.'))

99.999


In [17]:
print((0.9999999 * 100))

99.99999000000001


In [46]:
data = np.arange(0, 100, dtype=float)
data[99] = 150230
data

array([0.0000e+00, 1.0000e+00, 2.0000e+00, 3.0000e+00, 4.0000e+00,
       5.0000e+00, 6.0000e+00, 7.0000e+00, 8.0000e+00, 9.0000e+00,
       1.0000e+01, 1.1000e+01, 1.2000e+01, 1.3000e+01, 1.4000e+01,
       1.5000e+01, 1.6000e+01, 1.7000e+01, 1.8000e+01, 1.9000e+01,
       2.0000e+01, 2.1000e+01, 2.2000e+01, 2.3000e+01, 2.4000e+01,
       2.5000e+01, 2.6000e+01, 2.7000e+01, 2.8000e+01, 2.9000e+01,
       3.0000e+01, 3.1000e+01, 3.2000e+01, 3.3000e+01, 3.4000e+01,
       3.5000e+01, 3.6000e+01, 3.7000e+01, 3.8000e+01, 3.9000e+01,
       4.0000e+01, 4.1000e+01, 4.2000e+01, 4.3000e+01, 4.4000e+01,
       4.5000e+01, 4.6000e+01, 4.7000e+01, 4.8000e+01, 4.9000e+01,
       5.0000e+01, 5.1000e+01, 5.2000e+01, 5.3000e+01, 5.4000e+01,
       5.5000e+01, 5.6000e+01, 5.7000e+01, 5.8000e+01, 5.9000e+01,
       6.0000e+01, 6.1000e+01, 6.2000e+01, 6.3000e+01, 6.4000e+01,
       6.5000e+01, 6.6000e+01, 6.7000e+01, 6.8000e+01, 6.9000e+01,
       7.0000e+01, 7.1000e+01, 7.2000e+01, 7.3000e+01, 7.4000e

In [58]:
np.quantile(data, 0.999, interpolation='higher')

150230.0

In [17]:
1./999

0.001001001001001001