In [None]:
import torch.nn as nn
import torch
import torchvision
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
import numpy as np

In [None]:
""" load cifar-10 dataset """ 

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

# load train set
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

In [None]:
""" select a random batch from dataloader """

# the sample returned is random b.c. shuffle=True in DataLoader call
# iter(trainloader).__iter__().next() is equivalent to iter(trainloader).next() -> returns a list of two tensors ([0]:images, [1]:labels)
images, labels = iter(trainloader).next()

In [None]:
""" define a network, optimizer, loss """

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module) :
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3,6,5)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)

    def forward(self,x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self,x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.1)

In [None]:
""" one training step on a batch """

optimizer.zero_grad()
outputs = net(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()

In [None]:
""" nn.CrossEntropyLoss() """

outputs = torch.randn(32,10)
labels = torch.randint(0, 10, (32,))

# needs to be instantiated first before calling
criterion = nn.CrossEntropyLoss()
loss = criterion(outputs, labels)
print(loss)

#### _nn.Loss modules need to be instantiated before calling_

* if directly do nn.CrossEntropyLoss(outputs, labels), will produce RuntimeError



In [None]:
""" return the weights, gradients of network layer """

# weight
print(net.conv1.weight.size())
# gradients of weight
print(net.conv1.weight.grad.size())
# bias
print(net.conv1.bias.size())
# gradients of bias
print(net.conv1.bias.grad.size())

# all learnable parameters in the network; as a generator class object
print(type(net.parameters()))
# to access, conver to a list of tensors; note that the weights and biases of a single layer are distinct tensors in the list, so len(net.parameters()) = 2 * num_layers
print(len(list(net.parameters())))

In [None]:
""" return all modules used in a network """

list(net.modules())

In [None]:
""" nn.Sequential """

In [None]:
""" torchsummary """

import model.resnet as net
from torchsummary import summary

myModel = net.resnet18()
summary(myModel, (3, 32, 32))

In [None]:
def matplotlib_imshow(img, one_channel=False):
    """ 
    helper function to show an image

    Args:
        img: (tensor) 2D image

    """
    if one_channel:
        img = img.mean(dim=0)
    # un-normalize
    img = img / 2 + 0.5 
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap='Greys')
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [None]:
img_grid = torchvision.utils.make_grid(images)
# matplotlib_imshow(img_grid, one_channel=False)

In [None]:
""" nn.AvgPool2d """

import torch
import torch.nn as nn

x = torch.rand(1, 1, 33, 33)
print(x.shape)
m = nn.AvgPool2d(kernel_size=3, padding=1, stride=2)
y = m(x)
print(y.shape)

#### torch.nn vs torch.nn.Functional

* torch.nn defines modules that are full layers
    * need to be defined in __init__
    * can have weights
    * can have state flags such as training or evaluation (e.g., nn.Dropout)
* torch.nn.Functional defines arithmetic operations, not full layers
    * do not have to be defined in __init__
    * can not directly be used as a layer, need additional customization
    * usually used for:
        * a) layers without states / weights, e.g., ReLU (but can also use nn.ReLU; I usually do it this way)
        * b) define custom Pytorch modules with existing nn.Functional primitives

notes:
* [this comment](https://discuss.pytorch.org/t/whats-difference-of-nn-softmax-nn-softmax-nn-functional-softmax/90934/3) (by Tom) mentioned that it is a bad idea to define a nn.ReLU in __init__ and reuse it, but why?

usage:
* to use nn.Module, need to first instantiate the module by m = nn.Module() then call m(input) (i.e., nn.Module is a class object)
* to use nn.functional.operator, can directly do function calls, e.g., nn.functional.operator(input)

In [None]:
""" nn.Softmax vs nn.Functional.softmax() """

x = torch.rand(2,3,4)
print(x)

### ----- nn.functional.softmax ----- ###
# normalizing along dim=2 (shape[2]=4)
print(nn.functional.softmax(x, dim=2, dtype=float))
# normalizing along dim=2 (shape[2]=4)
print(nn.functional.softmax(x, dim=-1, dtype=float))
# normalizing along dim=1 (shape[1]=3)
print(nn.functional.softmax(x, dim=1, dtype=float))
# normalizing along dim=0 (shape[0]=2)
print(nn.functional.softmax(x, dim=0, dtype=float))

### ----- nn.Softmax ------ ###
print(nn.Softmax(x))
m = nn.Softmax(dim=-1)
print(m(x))

### Debug: expected scalar type Long but go scalar type Float
* happens in calling CrossEntropyLoss() criterion by passing the network output & label
* issue: label should be dtype = torch.int64 (long), but I might have given it as torch.Float
* see [this discussion](https://discuss.pytorch.org/t/expected-object-of-scalar-type-long-but-got-scalar-type-float-for-argument-2-target/33102) for details

In [None]:
""" nn.Parameter() """

### see this explanation: https://stackoverflow.com/questions/50935345/understanding-torch-nn-parameter
### in a nutshell: nn.Paramter() returns a tensor subclass object, such that when this object is assigned to a nn.Module object, it is automatically registered to that module's parameter list (e.g., can be returned by .parameter() method); the reason this feature is added is that, there are cases where assigning a tensor to a module but does not want to register to its parameter list (e.g., don't require grad update);

### Q: what's difference b/t this and torch.register_buffer()?