<a href="https://colab.research.google.com/github/ibkvictor/hello-pytorch/blob/main/torch_practice1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):

  def __init__(self):
    super().__init__()

    #initializing the different types of layers used
    self.cv1 = nn.Conv2d(1, 6, 3)
    self.cv2 = nn.Conv2d(6, 16, 3)

    self.fc1 = nn.Linear(6 * 6 * 16, 120) #576 nodes of flattened out image shape
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    #first convolution and pooling
    x = F.max_pool2d(F.relu(self.cv1(x)), (2,2)) #know the difference btw F and nn layers ( calculation or functions vs. actual layers )
    #also the difference of capital letters and otherwise

    #second convolution and pooling
    x = F.max_pool2d(F.relu(self.cv2(x)), (2))

    x = x.view(-1, self.num_flat_features(x))
    print(x.size()) #left for  debugging (Flatten)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    out = self.fc3(x)
    return out

  def num_flat_features(self, x):
    size = x.size()[1:] #take all of the sizes except batch dim.
    features = 1
    for s in size:
      features *= s

    return features

In [4]:
net = Net()
print(net)

Net(
  (cv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (cv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [5]:
print(net.parameters) #network parameters

<bound method Module.parameters of Net(
  (cv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (cv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)>


In [6]:
print(net.parameters()) #it is an object
#hence
print(len(list(net.parameters()))) #has 10 actual trainable parameters

<generator object Module.parameters at 0x7ffb20598d58>
10


In [7]:
print(list(net.parameters())[0]) #first parameter

Parameter containing:
tensor([[[[ 0.2486,  0.0480, -0.0728],
          [-0.2017,  0.3123, -0.2255],
          [-0.2501,  0.0513, -0.0997]]],


        [[[-0.0354,  0.1109, -0.0159],
          [ 0.0390, -0.0225, -0.0757],
          [-0.3162,  0.2986, -0.2623]]],


        [[[ 0.1769, -0.2397, -0.0101],
          [-0.0317,  0.2137,  0.1802],
          [ 0.0547, -0.2941, -0.0164]]],


        [[[ 0.1915,  0.2895,  0.1502],
          [-0.1199, -0.0533,  0.0309],
          [ 0.1550,  0.1738,  0.2024]]],


        [[[-0.2923,  0.1422,  0.0815],
          [ 0.1883,  0.2606,  0.2460],
          [ 0.1215, -0.0795, -0.3097]]],


        [[[-0.3252,  0.1998,  0.0292],
          [ 0.2720, -0.0745, -0.0488],
          [ 0.0036,  0.1469,  0.3103]]]], requires_grad=True)


In [2]:
#testing with input
a = torch.randn(1, 1, 32, 32, requires_grad=True) #an image of one channel and size 32 by 32. dont forget to add false batch.
#requires_grad important for creating and retaining graph. This ensures that backward does not throw error (exception)

In [8]:
t = net(a)

torch.Size([1, 576])


In [9]:
print(t)

tensor([[-0.0401,  0.0708, -0.0458,  0.0499,  0.0666, -0.1272, -0.0236, -0.0713,
         -0.0156,  0.0484]], grad_fn=<AddmmBackward>)


to zero all the buffered gradients and used randn gradients

In [11]:
net.zero_grad()
t.sum().backward(retain_graph=True)

In [12]:
loss_fn = nn.MSELoss() #loss is like a class or package
expected = torch.randn(1, 10) # size of output "t"
loss = loss_fn(t, expected) #scalar loss value

print(loss)


tensor(1.1391, grad_fn=<MseLossBackward>)


In [13]:
print(loss.grad_fn) 
print(loss.grad_fn.next_functions[0][0]) #the gradient functions of the immediately following function
print(loss.grad_fn.next_functions[0][0])

<MseLossBackward object at 0x7ffad36d0518>
<AddmmBackward object at 0x7ffad36c5a20>
<AddmmBackward object at 0x7ffad36c59b0>


exploring gradients before and after backward() [backprop] -- biases

In [14]:
#zero grads first
net.zero_grad()

print(net.fc3.bias.grad)

a.detach_()
loss.backward() #still dont fully understand this. had to  recompile


print(net.fc3.bias.grad)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
tensor([-0.1844,  0.1911,  0.2315, -0.4516,  0.2387, -0.1557, -0.1849, -0.0759,
        -0.0671, -0.0431])


updating weights

In [15]:
learning_rate = 0.01

for s in net.parameters():
  s.data.sub_(learning_rate * s.grad.data) #sub_ -- subraction, grad.data -- the gradient of that parameter

In [None]:
# using optim module
import torch.optim as optimizer

optimizer = optimizer(net.parameters(), lr = 0.01)

optimizer.zero_grad()
output = net(input)
loss = nn.MSELoss(output, expected)
loss.backward()
optimizer.step()