In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from random import randint
import numpy as np

import time


In [9]:
def display_num_param(net):
    nb_param = 0
    for param in net.parameters():
        nb_param += param.numel()
    print('There are {} ({:.2f} million) parameters in this neural network'.format(
        nb_param, nb_param/1e6)
    )
    

In [15]:
class one_layer(torch.nn.Module):
    def __init__(self):
        super(one_layer, self).__init__()
        self.W1 = torch.nn.Linear(784, 100)
        self.W2 = torch.nn.Linear(100, 10)
        
        
        
    def forward(self, x):
        z = self.W1(x)
        z = torch.relu(z)
        s = self.W2(z)        
        return s

n1 = one_layer()
display_num_param(n1)

There are 79510 (0.08 million) parameters in this neural network


In [20]:
class test(torch.nn.Module):
    def __init__(self):
        super(test, self).__init__()
        self.W1 = torch.nn.Linear(2, 3)
        self.W2 = torch.nn.Linear(3, 2)
        
        # Initialize the weight matrices
        self.W1.weight.data.fill_(1.0)
        self.W2.weight.data.fill_(1.0)
        
        # Initialize the biases
        self.W1.bias.data.fill_(0.5)
        self.W2.bias.data.fill_(0.5)
        
        
    def forward(self, x):
        z = self.W1(x)
        s = self.W2(z)        
        return s

n2 = test()
    
    #index is flipped first index is H second index is W
    
with torch.no_grad():
    n2.W1.weight[0,0]=0.1  #w1h1
    n2.W1.weight[0,1]=0.2  #w2h1
    
    n2.W1.weight[1,0]=0.1  #w1h2
    n2.W1.weight[1,1]=0.2  #w2h2
    
    n2.W1.weight[2,0]=0.2  #w1h3
    n2.W1.weight[2,1]=0.3  #w2h3
    
    n2.W2.weight[0,0]=0.4  #h1o1
    n2.W2.weight[0,1]=0.5  #h2o1
    n2.W2.weight[0,2]=0.6  #h3o1
    
    n2.W2.weight[1,0]=0.4  #h1o2
    n2.W2.weight[1,1]=0.5  #h2o2
    n2.W2.weight[1,2]=0.7  #h3o2
    
    n2.W1.bias.data.fill_(0.03)
    n2.W2.bias.data.fill_(0.6)
    
inputw = torch.Tensor([0.5,0.1])

scores = n2(inputw)

print(scores)


tensor([0.7860, 0.8020], grad_fn=<AddBackward0>)


In [32]:
X = torch.Tensor([
    [2,1,1],
    [-1,0,-1],
    [1,0,0]
])

W = torch.Tensor([
    [1,2,0],
    [1,1,2],
    [-1,2,1]
])
Y = torch.tensor([1,2,0])

output = W.mm(X) # need to transpose back because Wx 
print(output)
loss = nn.CrossEntropyLoss()

cel = loss(output.T,Y)
print(cel)

tensor([[ 0.,  1., -1.],
        [ 3.,  1.,  0.],
        [-3., -1., -3.]])
tensor(1.3862)


In [48]:
lse = torch.Tensor([1,2,10,3])
X = torch.Tensor([1,1,1,1])
s = lse.mul(X)
print(s)
output = torch.logsumexp(s, dim=0)

print(output)

tensor([ 1.,  2., 10.,  3.])
tensor(10.0014)


In [35]:

#CNN INPUT to Output

inputshape = [32,32,3] #input shape 
numberfilters = 10 # depth or number of channels
spatial_extent = 5 # 5x5 or 3x3 or 7x7 also known as F
pad  = 2
stride = 1

W = (inputshape[0] - spatial_extent + 2*pad  )/ stride + 1
H = (inputshape[1] - spatial_extent + 2*pad  )/ stride + 1
D = numberfilters

totalparams =  ( spatial_extent*spatial_extent*inputshape[2] +1 ) * numberfilters

print("Total parameters : " , totalparams)
print("W x H x D = {} x {} x {}".format(W,H,D))


Total parameters :  760
W x H x D = 32.0 x 32.0 x 10


In [7]:
# Pooling input to output

inputshape = [14,14,3]
spatial_extent = 2 # 2x2 or size of region
stride = 2 # usually same as spatial extent

W = ((inputshape[0]-spatial_extent)/stride) + 1
H = ((inputshape[1]-spatial_extent)/stride) + 1
D = inputshape[2]

print("W x H x D = {} x {} x {}".format(W,H,D))


W x H x D = 7.0 x 7.0 x 3


In [12]:
#%reset -f
import torch
import datetime
print('Timestamp:',datetime.datetime.now().strftime("%y-%m-%d--%H-%M-%S"))

x = torch.tensor([[ 1.0459,  0.1869,  0.1149,  0.5970,  0.4373],
                  [ 1.0104,  0.3820,  1.0039,  0.6381, -0.1052],
                  [-0.5516,  1.3038,  1.7466, -0.6411,  0.9880]])


## YOUR CODE STARTS HERE 
class MLP(torch.nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.W1 = torch.nn.Linear(5, 10)
        self.W2 = torch.nn.Linear(10, 20)
        self.W3 = torch.nn.Linear(20, 2)
        
        # Initialize the weight matrices
        self.W1.weight.data.fill_(1.0)
        self.W2.weight.data.fill_(1.0)
        self.W3.weight.data.fill_(1.0)
        
        # Initialize the biases
        self.W1.bias.data.fill_(0.5)
        self.W2.bias.data.fill_(0.5)
        self.W3.bias.data.fill_(0.5)
        
    def forward(self, x):
        z = self.W1(x)
        z = torch.relu(z)
        z = self.W2(z)
        z = torch.relu(z)
        s = self.W3(z)
        return s
       
net = MLP()
print(net)

s = net(x)
print(s)

grad_L_s = torch.Tensor([[-3.2, 1.3], 
                         [1.2, -0.7], 
                         [-0.2, 0.1]])

s.backward(gradient=grad_L_s)

grad_L_W1 = net.W1.weight.grad
grad_L_b1 = net.W1.bias.grad

print("Gradient of L w.r.t for W1 is:", grad_L_W1)
print("Gradient of L w.r.t for b1 is:", grad_L_b1)

display_num_param(net)

Timestamp: 21-10-16--20-18-33
MLP(
  (W1): Linear(in_features=5, out_features=10, bias=True)
  (W2): Linear(in_features=10, out_features=20, bias=True)
  (W3): Linear(in_features=20, out_features=2, bias=True)
)
tensor([[586.9000, 586.9000],
        [696.3400, 696.3400],
        [679.6401, 679.6401]], grad_fn=<AddmmBackward>)
Gradient of L w.r.t for W1 is: tensor([[-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454],
        [-28.5370,  -5.8898,   2.1796, -15.0228, -19.6454]])
Gradient of L w.r.t for b1 is: tensor([-