In [1]:
import torch
from torch import nn
from torch import optim
import torchvision.datasets as datasets
import torch.nn.functional as F

In [8]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

In [3]:
mnist = datasets.MNIST('', download=True)
model = Net()
loss_fn = nn.CrossEntropyLoss()

In [4]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)

In [5]:
optimizer.param_groups

[{'params': [Parameter containing:
   tensor([[[[-0.3108,  0.1116,  0.3054],
             [-0.3128, -0.0290, -0.1490],
             [-0.2491, -0.2264, -0.1127]]],
   
   
           [[[ 0.2384, -0.0073,  0.0537],
             [-0.1314,  0.2111, -0.1684],
             [-0.2062, -0.0136,  0.2124]]],
   
   
           [[[-0.0936,  0.2458,  0.3264],
             [-0.1991,  0.1401,  0.3245],
             [-0.0773, -0.0878, -0.2985]]],
   
   
           [[[ 0.0485,  0.0681,  0.0031],
             [ 0.1076, -0.2443,  0.0430],
             [-0.1331, -0.3076, -0.1896]]],
   
   
           [[[-0.0285, -0.3064, -0.2527],
             [ 0.0350, -0.0148,  0.1634],
             [ 0.2579, -0.0379,  0.2057]]],
   
   
           [[[ 0.1894, -0.1798, -0.1854],
             [-0.2987, -0.0931, -0.1017],
             [-0.2677,  0.0693, -0.0506]]],
   
   
           [[[-0.3224, -0.1736, -0.2891],
             [ 0.2825,  0.0170,  0.0413],
             [-0.2621,  0.0958,  0.1171]]],
   
   
           [[

In [6]:
type(optimizer.param_groups)

list

In [7]:
group = optimizer.param_groups[0]

In [8]:
group

{'params': [Parameter containing:
  tensor([[[[-0.3108,  0.1116,  0.3054],
            [-0.3128, -0.0290, -0.1490],
            [-0.2491, -0.2264, -0.1127]]],
  
  
          [[[ 0.2384, -0.0073,  0.0537],
            [-0.1314,  0.2111, -0.1684],
            [-0.2062, -0.0136,  0.2124]]],
  
  
          [[[-0.0936,  0.2458,  0.3264],
            [-0.1991,  0.1401,  0.3245],
            [-0.0773, -0.0878, -0.2985]]],
  
  
          [[[ 0.0485,  0.0681,  0.0031],
            [ 0.1076, -0.2443,  0.0430],
            [-0.1331, -0.3076, -0.1896]]],
  
  
          [[[-0.0285, -0.3064, -0.2527],
            [ 0.0350, -0.0148,  0.1634],
            [ 0.2579, -0.0379,  0.2057]]],
  
  
          [[[ 0.1894, -0.1798, -0.1854],
            [-0.2987, -0.0931, -0.1017],
            [-0.2677,  0.0693, -0.0506]]],
  
  
          [[[-0.3224, -0.1736, -0.2891],
            [ 0.2825,  0.0170,  0.0413],
            [-0.2621,  0.0958,  0.1171]]],
  
  
          [[[-0.0388,  0.2726,  0.1130],
        

In [9]:
group.keys()

dict_keys(['params', 'lr', 'momentum', 'dampening', 'weight_decay', 'nesterov'])

In [10]:
parameters = group['params']

In [11]:
parameters

[Parameter containing:
 tensor([[[[-0.3108,  0.1116,  0.3054],
           [-0.3128, -0.0290, -0.1490],
           [-0.2491, -0.2264, -0.1127]]],
 
 
         [[[ 0.2384, -0.0073,  0.0537],
           [-0.1314,  0.2111, -0.1684],
           [-0.2062, -0.0136,  0.2124]]],
 
 
         [[[-0.0936,  0.2458,  0.3264],
           [-0.1991,  0.1401,  0.3245],
           [-0.0773, -0.0878, -0.2985]]],
 
 
         [[[ 0.0485,  0.0681,  0.0031],
           [ 0.1076, -0.2443,  0.0430],
           [-0.1331, -0.3076, -0.1896]]],
 
 
         [[[-0.0285, -0.3064, -0.2527],
           [ 0.0350, -0.0148,  0.1634],
           [ 0.2579, -0.0379,  0.2057]]],
 
 
         [[[ 0.1894, -0.1798, -0.1854],
           [-0.2987, -0.0931, -0.1017],
           [-0.2677,  0.0693, -0.0506]]],
 
 
         [[[-0.3224, -0.1736, -0.2891],
           [ 0.2825,  0.0170,  0.0413],
           [-0.2621,  0.0958,  0.1171]]],
 
 
         [[[-0.0388,  0.2726,  0.1130],
           [-0.0557,  0.0339,  0.1181],
           [ 0.

In [12]:
parameters[0]

Parameter containing:
tensor([[[[-0.3108,  0.1116,  0.3054],
          [-0.3128, -0.0290, -0.1490],
          [-0.2491, -0.2264, -0.1127]]],


        [[[ 0.2384, -0.0073,  0.0537],
          [-0.1314,  0.2111, -0.1684],
          [-0.2062, -0.0136,  0.2124]]],


        [[[-0.0936,  0.2458,  0.3264],
          [-0.1991,  0.1401,  0.3245],
          [-0.0773, -0.0878, -0.2985]]],


        [[[ 0.0485,  0.0681,  0.0031],
          [ 0.1076, -0.2443,  0.0430],
          [-0.1331, -0.3076, -0.1896]]],


        [[[-0.0285, -0.3064, -0.2527],
          [ 0.0350, -0.0148,  0.1634],
          [ 0.2579, -0.0379,  0.2057]]],


        [[[ 0.1894, -0.1798, -0.1854],
          [-0.2987, -0.0931, -0.1017],
          [-0.2677,  0.0693, -0.0506]]],


        [[[-0.3224, -0.1736, -0.2891],
          [ 0.2825,  0.0170,  0.0413],
          [-0.2621,  0.0958,  0.1171]]],


        [[[-0.0388,  0.2726,  0.1130],
          [-0.0557,  0.0339,  0.1181],
          [ 0.1739,  0.1216,  0.2301]]],


        [[

In [13]:
type(parameters[0])

torch.nn.parameter.Parameter

In [14]:
param_tensor = parameters[0].data

In [15]:
type(param_tensor)

torch.Tensor

In [16]:
len(parameters)

8

In [17]:
parameters[1]

Parameter containing:
tensor([ 0.1732,  0.0156, -0.1424,  0.2263, -0.1595, -0.1775, -0.3182,  0.0562,
         0.1604,  0.0107,  0.0971, -0.3222, -0.0087, -0.3162, -0.1337, -0.2749,
         0.3249, -0.0460,  0.0480,  0.1197, -0.0072,  0.0725,  0.0649, -0.0651,
        -0.2702,  0.2664, -0.0266, -0.1773,  0.0602, -0.0155, -0.0295,  0.1996],
       requires_grad=True)

In [18]:
layers = []
stat_names = []

for name, layer in model.named_modules():
    if type(layer) == nn.Conv2d or type(layer) == nn.Linear:
        if len(layers) > 0:  # there is a shift of 1 in the name: for layer conv1 we use stats['conv2'] for example for the original MNIST net.
            stat_names.append(name)
        layers.append(layer)

In [2]:
int(-127.8)

-127

In [3]:
round(-127.8)

-128

In [4]:
round(-127.3)

-127

In [8]:
round(127.1)

127

In [6]:
t1 = torch.ones(10, 10)
t1

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [7]:
t2 = torch.ones(10, 1)
t2

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])

In [8]:
t1 + t2  # Test how the automatic broadcast works

tensor([[2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]])

In [10]:
torch.tensor([1, 2, 3, 4])[1:-1]

tensor([2, 3])

In [2]:
t = torch.tensor([-5, 2, 3, 8, 10, 15, 50])

In [3]:
t.clamp(0, 10)

tensor([ 0,  2,  3,  8, 10, 10, 10])

In [4]:
t

tensor([-5,  2,  3,  8, 10, 15, 50])

In [8]:
(t < 0).sum().item()

1

In [7]:
t = torch.tensor(2 ** 40)

In [8]:
t

tensor(1099511627776)

In [9]:
t.type()

'torch.LongTensor'

In [14]:
t2 = t.long()

In [15]:
t2

tensor(1099511627776)

In [16]:
t2.type()

'torch.LongTensor'

In [2]:
t = torch.tensor([[1, 2, 3], [4, 5, 6]])

In [6]:
t.view(3, -1)

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [7]:
t

tensor([[1, 2, 3],
        [4, 5, 6]])

In [9]:
class Net2(torch.nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.seq = nn.Sequential(
            nn.Linear(100, 10),
            nn.ReLU())

    def forward(self, x):
        return self.seq(x)

In [10]:
net = Net2()

In [15]:
for layer in net.seq:
    layer.hahahah = nn.ReLU()
    print(layer)

Linear(
  in_features=100, out_features=10, bias=True
  (hahahah): ReLU()
)
ReLU(
  (hahahah): ReLU()
)


In [17]:
for layer in net.seq:
    print("test")

test
test


In [18]:
for idx, layer in enumerate(net.seq):
    print("test")

test
test


In [4]:
mod = nn.BatchNorm2d(3)

In [6]:
for p in mod.parameters():
    print(p)

Parameter containing:
tensor([1., 1., 1.], requires_grad=True)
Parameter containing:
tensor([0., 0., 0.], requires_grad=True)
