<a href="https://colab.research.google.com/github/ibkvictor/hello-pytorch/blob/main/torch_practice1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [96]:
class Net(nn.Module):

  def __init__(self):
    super().__init__()

    #initializing the different types of layers used
    self.cv1 = nn.Conv2d(1, 6, 3)
    self.cv2 = nn.Conv2d(6, 16, 3)

    self.fc1 = nn.Linear(6 * 6 * 16, 120) #576 nodes of flattened out image shape
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    #first convolution and pooling
    x = F.max_pool2d(F.relu(self.cv1(x)), (2,2)) #know the difference btw F and nn layers ( calculation or functions vs. actual layers )
    #also the difference of capital letters and otherwise

    #second convolution and pooling
    x = F.max_pool2d(F.relu(self.cv2(x)), (2))

    x = x.view(-1, self.num_flat_features(x))
    print(x.size()) #left for  debugging (Flatten)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    out = self.fc3(x)
    return out

  def num_flat_features(self, x):
    size = x.size()[1:] #take all of the sizes except batch dim.
    features = 1
    for s in size:
      features *= s

    return features

In [97]:
net = Net()
print(net)

Net(
  (cv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (cv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [29]:
print(net.parameters) #network parameters

<bound method Module.parameters of Net(
  (cv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (cv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)>


In [34]:
print(net.parameters()) #it is an object
#hence
print(len(list(net.parameters()))) #has 10 actual trainable parameters

<generator object Module.parameters at 0x7fe3ff7b97d8>
10


In [35]:
print(list(net.parameters())[0]) #first parameter

Parameter containing:
tensor([[[[-0.2612, -0.3171,  0.2833],
          [ 0.2929,  0.0188,  0.0336],
          [-0.0038, -0.3151, -0.0397]]],


        [[[-0.2683,  0.0496, -0.2228],
          [-0.3084,  0.0435, -0.0312],
          [-0.0703, -0.2709,  0.0857]]],


        [[[ 0.0787,  0.2169, -0.1227],
          [ 0.1082, -0.3197, -0.1278],
          [ 0.0017,  0.1357, -0.1427]]],


        [[[ 0.2425,  0.1497,  0.3158],
          [-0.2310,  0.2438,  0.0525],
          [-0.1189,  0.3210,  0.1360]]],


        [[[-0.2015, -0.0315,  0.1886],
          [ 0.2638,  0.1968,  0.1546],
          [-0.2258, -0.2442, -0.0966]]],


        [[[-0.3159,  0.1776,  0.2716],
          [-0.0298,  0.1378,  0.0165],
          [-0.3319,  0.2644,  0.2307]]]], requires_grad=True)


In [91]:
#testing with input
a = torch.randn(1, 1, 32, 32) #an image of one channel and size 32 by 32. dont forget to add false batch.

In [98]:
t = net(a)

torch.Size([1, 576])


In [99]:
print(t)

tensor([[-0.0982, -0.1773, -0.0867, -0.0035,  0.0191,  0.0578, -0.0175, -0.0459,
          0.0133, -0.0766]], grad_fn=<AddmmBackward>)


to zero all the buffered gradients and used randn gradients

In [93]:
net.zero_grad()
t.backward(retain_graph=True)

RuntimeError: ignored

In [100]:
loss_fn = nn.MSELoss() #loss is like a class or package
expected = torch.randn(1, 10) # size of output "t"
loss = loss_fn(t, expected) #scalar loss value

print(loss)


tensor(0.9447, grad_fn=<MseLossBackward>)


In [78]:
print(loss.grad_fn) 
print(loss.grad_fn.next_functions[0][0]) #the gradient functions of the immediately following function
print(loss.grad_fn.next_functions[0][0])

<MseLossBackward object at 0x7fe3ff8b4278>
<AddmmBackward object at 0x7fe3ff8b4ba8>
<AddmmBackward object at 0x7fe3ff8b4908>


exploring gradients before and after backward() [backprop] -- biases

In [102]:
#zero grads first
net.zero_grad()

print(net.fc3.bias.grad)

a.detach_()
loss.backward() #still dont fully understand this. had to  recompile


print(net.fc3.bias.grad)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])


RuntimeError: ignored

updating weights

In [103]:
learning_rate = 0.01

for s in net.parameters():
  s.data.sub_(learning_rate * s.grad.data) #sub_ -- subraction, grad.data -- the gradient of that parameter

In [None]:
# using optim module
import torch.optim as optimizer

optimizer = optimizer(net.parameters(), lr = 0.01)

optimizer.zero_grad()
output = net(input)
loss = nn.MSELoss(output, expected)
loss.backward()
optimizer.step()