In [None]:
import torch.nn as nn
import torch
import torchvision
import torchvision.transforms as transforms

In [None]:
""" load cifar-10 dataset """ 

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

# load train set
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

In [None]:
""" select a random batch from dataloader """

# the sample returned is random b.c. shuffle=True in DataLoader call
# iter(trainloader).__iter__().next() is equivalent to iter(trainloader).next() -> returns a list of two tensors ([0]:images, [1]:labels)
images, labels = iter(trainloader).next()

In [None]:
""" define a network, optimizer, loss """

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module) :
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3,6,5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)

    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self,x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.1)

In [None]:
""" one training step on a batch """

optimizer.zero_grad()
outputs = net(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

In [None]:
""" nn.CrossEntropyLoss() """

outputs = torch.randn(32,10)
labels = torch.randint(0, 10, (32,))

# needs to be instantiated first before calling
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, labels)
print(loss)

#### _nn.Loss modules need to be instantiated before calling_

* if directly do nn.CrossEntropyLoss(outputs, labels), will produce RuntimeError



In [None]:
""" return the weights, gradients of network layer """

# weight
print(net.conv1.weight.size())
# gradients of weight
print(net.conv1.weight.grad.size())
# bias
print(net.conv1.bias.size())
# gradients of bias
print(net.conv1.bias.grad.size())

# all learnable parameters in the network; as a generator class object
print(type(net.parameters()))
# to access, conver to a list of tensors; note that the weights and biases of a single layer are distinct tensors in the list, so len(net.parameters()) = 2 * num_layers
print(len(list(net.parameters())))

In [19]:
""" return all modules used in a network """

list(net.modules())

[Net(
   (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
   (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
   (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
   (fc1): Linear(in_features=400, out_features=120, bias=True)
   (fc2): Linear(in_features=120, out_features=84, bias=True)
   (fc3): Linear(in_features=84, out_features=10, bias=True)
 ),
 Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1)),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1)),
 Linear(in_features=400, out_features=120, bias=True),
 Linear(in_features=120, out_features=84, bias=True),
 Linear(in_features=84, out_features=10, bias=True)]