### Matrix formulation using pytorch

In [2]:
import torch

In [3]:
torch.tensor([[2, 3, 5],[1, 2, 9]])

tensor([[2, 3, 5],
        [1, 2, 9]])

In [4]:
torch.rand(2, 2)

tensor([[0.3335, 0.3502],
        [0.7910, 0.1650]])

In [5]:
a = torch.rand((3, 5))
a.shape

torch.Size([3, 5])

In [6]:
print(a)

tensor([[0.1490, 0.3160, 0.6597, 0.5707, 0.4763],
        [0.9019, 0.6169, 0.3325, 0.6233, 0.0053],
        [0.5741, 0.1126, 0.9718, 0.4415, 0.1306]])


### Matrix formation using numpy

In [7]:
import numpy as np

np.array([[2, 3, 5], [1, 2, 9]])

array([[2, 3, 5],
       [1, 2, 9]])

In [8]:
np.random.rand(2, 2)

array([[0.88709775, 0.07082287],
       [0.1096779 , 0.46369997]])

In [9]:
a = np.random.randn(3, 5)
a.shape

print(a)

[[ 0.8493118  -0.83921889 -0.68527776  0.40523211  0.01020729]
 [ 0.3509976   1.14614562 -0.86163311  0.09308297  0.82061612]
 [-0.38461001 -0.58777828  1.77984613  0.00860337 -0.59306172]]


### Construct a randomly initialized matrix and operation using Pytorch

In [10]:
a = torch.rand((2, 2))
b = torch.rand((2, 2))

print(a)

print(b)

tensor([[0.8409, 0.7544],
        [0.4864, 0.6440]])
tensor([[0.4460, 0.1140],
        [0.3941, 0.2157]])


In [11]:
torch.matmul(a, b)

tensor([[0.6724, 0.2585],
        [0.4708, 0.1943]])

In [12]:
a * b

tensor([[0.3750, 0.0860],
        [0.1917, 0.1389]])

### Construct a randomly initialized matrix and operation using NumPy

In [13]:
a = np.random.rand(2, 2)
b = np.random.rand(2, 2)

In [14]:
np.dot(a, b)

# normal matrix multiplication

# Where the condition of number of columns of first array should be equal to number of rows of second array is checked

# than only numpy.dot() function take place else it shows an error.

array([[0.7653224 , 0.48476201],
       [0.23174327, 0.24750403]])

In [15]:
np.multiply(a, b)

# '*' operation carries out element-wise multiplication on array elements.

# The element at a[i][j] is multiplied with b[i][j]. This happens for all elements of array.

array([[0.16953529, 0.54954758],
       [0.23071601, 0.03469407]])

### Construct a 5x3 matrix, uninitialized:

In [16]:
x = torch.empty(5, 3)
print(x)

tensor([[1.0102e-38, 1.0286e-38, 1.0194e-38],
        [9.6429e-39, 9.2755e-39, 9.1837e-39],
        [9.3674e-39, 1.0745e-38, 1.0653e-38],
        [9.5510e-39, 1.0561e-38, 1.0194e-38],
        [1.1112e-38, 1.0561e-38, 9.9184e-39]])


### Construct a matrix filled zeros and of dtype long:

In [17]:
a_torch = torch.zeros(2, 2)

In [18]:
a_numpy = np.zeros((2, 2))

In [19]:
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [20]:
b_torch = torch.ones(2, 2)

In [21]:
b_numpy = np.zeros((2, 2))

print(a_numpy)

[[0. 0.]
 [0. 0.]]


In [22]:
x = torch.zeros(5, 3, dtype = torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [23]:
b_torch = torch.ones(2, 2)

print(b_torch)

tensor([[1., 1.],
        [1., 1.]])


In [24]:
b_numpy = np.ones((2, 2))

print(b_numpy)

[[1. 1.]
 [1. 1.]]


In [25]:
c_numpy = np.identity(2)

# Python NumPy identity() is an inbuilt NumPy function that is used for returning a matrix i.e.,

# a 2D array having 1's at its main diagonal and 0's elsewhere

print(c_numpy)

[[1. 0.]
 [0. 1.]]


In [26]:
c_torch = torch.eye(2)

print(c_torch)

tensor([[1., 0.],
        [0., 1.]])


In [27]:
d_torch = torch.from_numpy(c_numpy)

print(d_torch)

tensor([[1., 0.],
        [0., 1.]], dtype=torch.float64)


In [28]:
d = c_torch.numpy()

print(d)

[[1. 0.]
 [0. 1.]]


### Construct a tensor directly from data:

In [29]:
x = torch.tensor([5.5, 3])
print(x)

tensor([5.5000, 3.0000])


### Create a tensor based on an existing tensor. These methods will reuse properties of the input tensor, e.g. dtype, unless new values are provided by user

In [30]:
x = x.new_ones(5, 3, dtype=torch.double) #new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float) # override dtype!
print(x) # result has the same size

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[-1.3959, -0.3461,  0.1696],
        [-0.3948, -0.7275,  1.3900],
        [ 1.1550, -0.8947,  0.6415],
        [-0.1248, -0.9158,  0.8163],
        [ 0.2196, -0.0094,  0.3906]])


In [31]:
print(x.size())

torch.Size([5, 3])


In [32]:
y = torch.rand(5, 3)
print(y)
print(x)
print(x + y)

tensor([[0.3655, 0.3404, 0.7335],
        [0.5155, 0.9287, 0.8116],
        [0.1133, 0.8328, 0.2075],
        [0.3790, 0.1071, 0.9723],
        [0.3013, 0.7502, 0.8146]])
tensor([[-1.3959, -0.3461,  0.1696],
        [-0.3948, -0.7275,  1.3900],
        [ 1.1550, -0.8947,  0.6415],
        [-0.1248, -0.9158,  0.8163],
        [ 0.2196, -0.0094,  0.3906]])
tensor([[-1.0304, -0.0057,  0.9031],
        [ 0.1207,  0.2012,  2.2016],
        [ 1.2683, -0.0620,  0.8490],
        [ 0.2542, -0.8087,  1.7886],
        [ 0.5210,  0.7408,  1.2052]])


In [33]:
print(torch.add(x, y))

tensor([[-1.0304, -0.0057,  0.9031],
        [ 0.1207,  0.2012,  2.2016],
        [ 1.2683, -0.0620,  0.8490],
        [ 0.2542, -0.8087,  1.7886],
        [ 0.5210,  0.7408,  1.2052]])


In [34]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[-1.0304, -0.0057,  0.9031],
        [ 0.1207,  0.2012,  2.2016],
        [ 1.2683, -0.0620,  0.8490],
        [ 0.2542, -0.8087,  1.7886],
        [ 0.5210,  0.7408,  1.2052]])


In [35]:
# adds x to y
y.add_(x)
print(y)

tensor([[-1.0304, -0.0057,  0.9031],
        [ 0.1207,  0.2012,  2.2016],
        [ 1.2683, -0.0620,  0.8490],
        [ 0.2542, -0.8087,  1.7886],
        [ 0.5210,  0.7408,  1.2052]])


In [36]:
print(x)

print(x[:, 1])

tensor([[-1.3959, -0.3461,  0.1696],
        [-0.3948, -0.7275,  1.3900],
        [ 1.1550, -0.8947,  0.6415],
        [-0.1248, -0.9158,  0.8163],
        [ 0.2196, -0.0094,  0.3906]])
tensor([-0.3461, -0.7275, -0.8947, -0.9158, -0.0094])


In [37]:
x = torch.randn(4, 4)

y = x.view(16)

z = x.view(-1, 8) # the size -1 is inferred from other dimensions

print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


### numpy.random.randn generates samples from the normal distribution (mean 0 and variance 1, while numpy.random.rand from a uniform distribution(in the range[0,1]))

In [38]:
# if you have one element tensor, use .item() to get the values as a Python number

In [39]:
x = torch.randn(1)
print(x)
print(x.item())

tensor([0.8300])
0.8300303816795349


## NumPy Bridge

In [40]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [41]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [42]:
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [43]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


## CUDA Tensors

In [44]:
if torch.cuda.is_available():
    device = torch.device("cuba")
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))

### Forward Propogation

In [45]:
a = torch.Tensor([2])
b = torch.Tensor([-4])
c = torch.Tensor([-2])
d = torch.Tensor([2])

e = a + b
f = c * d

g = e * f
print(e, f, g)

tensor([-2.]) tensor([-4.]) tensor([8.])


### Backward Propogation

In [46]:
import torch

x = torch.tensor(-3., requires_grad=True)
y = torch.tensor(5., requires_grad=True)
z = torch.tensor(-2., requires_grad=True)

q = x + y
f = q * z

f.backward()

print("Gradient of z is: " + str(z.grad))
print("Gradient of y is: " + str(y.grad))
print("Gradient of x is: " + str(x.grad))

Gradient of z is: tensor(2.)
Gradient of y is: tensor(-2.)
Gradient of x is: tensor(-2.)


#### initialize tensors x, y and z to values 4, -3 and 5. Put the sum of tensors x and y in q, put the product of q and z in f. Calculate the derivatives of the computational graph.Print the gradients of the x, y and z tensors.

In [47]:
# Initialize x, y and z to values 4, -3 and 5
x = torch.tensor(4., requires_grad=True)
y = torch.tensor(-3., requires_grad=True)
z = torch.tensor(5., requires_grad=True)

# Set q to sum of x and y, set f to product of q with z
q = x + y
f = q * z

# Compute the derivatives
f.backward()

print("Gradient of x is: " + str(x.grad))
print("Gradient of y is: " + str(y.grad))
print("Gradient of z is: " + str(z.grad))

Gradient of x is: tensor(5.)
Gradient of y is: tensor(5.)
Gradient of z is: tensor(1.)


#### Initialize a random tensors x, y and z, each having shape(1000, 1000) multiply x with y, putting the result in tensor q. Do an elementwise multiplication of tensor q, putting the results in f

In [48]:
# Initialize x, y and z to values 4, -3 and 5
x = torch.rand(1000, 1000, requires_grad=True)
y = torch.rand(1000, 1000, requires_grad=True)
z = torch.rand(1000, 1000, requires_grad=True)

# Multiply x with y
q = torch.matmul(x, y)

# Multiply elementwise z with q
f = z * q

mean_f = torch.mean(f)
print(mean_f)

mean_f.backward()

# Print the gradients
print("Gradient of x is: " + str(x.grad))
print("Gradient of y is: " + str(y.grad))
print("Gradient of z is: " + str(z.grad))

tensor(124.9837, grad_fn=<MeanBackward0>)
Gradient of x is: tensor([[0.0003, 0.0002, 0.0002,  ..., 0.0002, 0.0002, 0.0002],
        [0.0003, 0.0002, 0.0002,  ..., 0.0002, 0.0002, 0.0002],
        [0.0003, 0.0002, 0.0002,  ..., 0.0002, 0.0003, 0.0002],
        ...,
        [0.0003, 0.0002, 0.0002,  ..., 0.0002, 0.0002, 0.0002],
        [0.0003, 0.0002, 0.0002,  ..., 0.0002, 0.0002, 0.0002],
        [0.0002, 0.0002, 0.0002,  ..., 0.0003, 0.0002, 0.0002]])
Gradient of y is: tensor([[0.0003, 0.0003, 0.0003,  ..., 0.0002, 0.0003, 0.0003],
        [0.0002, 0.0003, 0.0002,  ..., 0.0002, 0.0002, 0.0002],
        [0.0003, 0.0003, 0.0003,  ..., 0.0002, 0.0003, 0.0002],
        ...,
        [0.0003, 0.0002, 0.0002,  ..., 0.0002, 0.0002, 0.0002],
        [0.0003, 0.0003, 0.0003,  ..., 0.0002, 0.0003, 0.0002],
        [0.0002, 0.0003, 0.0002,  ..., 0.0002, 0.0002, 0.0002]])
Gradient of z is: tensor([[0.0002, 0.0002, 0.0002,  ..., 0.0003, 0.0003, 0.0003],
        [0.0002, 0.0002, 0.0002,  ..., 0.000

### Fully connected neural networks

In [49]:
import torch

input_layer = torch.rand(10) # declaration of input values containing 10 units

w1 = torch.rand(10, 20)

w2 = torch.rand(20, 20)

w3 = torch.rand(20, 4)

h1 = torch.matmul(input_layer, w1) # first hidden layer

h2 = torch.matmul(h1, w2) # subsequent hidden layer

output_layer = torch.matmul(h2, w3) # output layer

print(h1)

print(h2)

print(output_layer)

tensor([1.7602, 1.3477, 1.4082, 1.9152, 2.0264, 1.6854, 1.3594, 1.0299, 1.5996,
        1.6457, 0.9111, 1.4088, 1.5390, 1.2327, 2.1149, 1.1533, 1.8238, 1.5783,
        1.7510, 1.7809])
tensor([16.3865, 15.1595, 14.9219, 17.0178, 14.5462, 12.5659, 17.3412, 15.7485,
        14.2710, 15.7446, 15.3054, 14.2609, 15.7503, 14.8172, 12.9473, 15.2674,
        15.6680, 15.6003, 13.1367, 17.7204])
tensor([170.0398, 166.2923, 154.0814, 179.5227])


### Building a neural network - PyTorch style

In [50]:
import torch

import torch.nn as nn

## Define a class Net which inherits from nn.Module (look that we are importing torch.nn)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(10, 20)
        self.fc2 = nn.Linear(20, 20)
        self.output = nn.Linear(20, 4)
        
# In the __init__ method, we define our parameters, the tensors of weights.
# For fully connected layers, they are called nn.Linear.
# the first parameter is the number of units of the current layer, while the second parameter is the number of units in the next layer.

# In the forward method, we apply all those weights to our input.
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.output(x)
        return x

# Finally, we instantiate our model, by calling class Net, and we get the result, by applying object net over our input_layer.
input_layer = torch.rand(10)
net = Net()
result = net(input_layer)

print(result)

tensor([-0.0774,  0.1597,  0.2369, -0.2951], grad_fn=<AddBackward0>)


### Creating neural network the hard way

In [51]:
import torch

# Your input will be image of size (28, 28), so images containing 784 pixels.
input_layer = torch.rand(784)

# Initialize the weights of the neural network
weight_1 = torch.rand(784, 200) # a hidden layer with 200 units, and an output layer with 10 classes.
weight_2 = torch.rand(200, 10)

# Multiply input_layer with weight_1
hidden_1 = torch.matmul(input_layer, weight_1)

# Multiply hidden_1 withh weight_2
output_layer = torch.matmul(hidden_1, weight_2)
print(output_layer)

tensor([20168.9570, 20113.0781, 20336.9082, 20527.5156, 20118.3789, 19856.6074,
        19793.1328, 19242.2520, 20205.8887, 19653.0840])


### Creating neural network Object Oriented way

In [52]:
import torch

import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 200)
        self.fc2 = nn.Linear(200, 10)
# Instantiate all 2 linear layers
        
# Use the instantiated layers and return x
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return x

input_layer = torch.rand(784)
net = Net()
result = net(input_layer)

print(result)

tensor([ 0.0557, -0.0043,  0.1090, -0.2566,  0.2374, -0.1103, -0.3727, -0.3776,
         0.1971, -0.0331], grad_fn=<AddBackward0>)


### Activation Functions

In [53]:
input_layer = torch.tensor([2., 1.])

weight_1 = torch.tensor([[0.45, 0.32], [-0.12, 0.29]])

hidden_layer = torch.matmul(input_layer, weight_1)

weight_2 = torch.tensor([[0.48, -0.12], [0.64, 0.91]])

output_layer = torch.matmul(hidden_layer, weight_2)

print(output_layer)

tensor([0.9696, 0.7527])


### Matrix multiplication is a linear transformation

In [54]:
import torch 

input_layer = torch.tensor([2., 1.])

weight_1 = torch.tensor([[0.45, 0.32], [-0.12, 0.29]])
weight_2 = torch.tensor([[0.48, -0.12], [0.64, 0.91]])

weight = torch.matmul(weight_1, weight_2)

output_layer = torch.matmul(input_layer, weight)

print(output_layer)

print(weight)

# as seen output in the above two cases is exactly similar which concludes:
# Network with multiple layers which do not contain non-linearity can be expressed as neural networks with one layer.

tensor([0.9696, 0.7527])
tensor([[0.4208, 0.2372],
        [0.1280, 0.2783]])


In [55]:
import torch.nn as nn

relu = nn.ReLU()

tensor_1 = torch.tensor([2., -4.])

print(relu(tensor_1))

tensor_2 = torch.tensor([[2., -4.],[1.2, 0.]])

print(relu(tensor_2))

tensor([2., 0.])
tensor([[2.0000, 0.0000],
        [1.2000, 0.0000]])


### without applying Relu

In [56]:
input_layer = torch.tensor([[ 0.0401, -0.9005, 0.0397, -0.0876]])

weight_1 = torch.tensor([[-0.1094, -0.8285, 0.0416, -1.1222],
                         [0.3327, -0.0461, 1.4473, -0.8070],
                         [0.0681, -0.7058, -1.8017, 0.5857],
                         [0.8764, 0.9618, -0.4505, 0.2888]])

weight_2 = torch.tensor([[0.6856, -1.7650, 1.6375, -1.5759],
                        [-0.1092, -0.1620, 0.1951, -0.1169],
                        [-0.5120, 1.1997, 0.8483, -0.2476],
                        [-0.3369, 0.5617, -0.6658, 0.2221]])

weight_3 = torch.tensor([[0.8824, 0.1268, 1.1951, 1.3061],
                        [-0.8753, -0.3277, -0.1454, -0.0167],
                        [0.3582, 0.3254, -1.8509, -1.4205],
                        [0.3786, 0.5999, -0.5665, -0.3975]])

# Calculate the first and second hidden layer
hidden_1 = torch.matmul(input_layer, weight_1)
hidden_2 = torch.matmul(hidden_1, weight_2)

# Calculate the output
print(torch.matmul(hidden_2, weight_3))

# Calculate weight_composed_1 and weight
weight_composed_1 = torch.matmul(weight_1, weight_2)
weight = torch.matmul(weight_composed_1, weight_3)

# Multiply input_layer with weight
print(torch.matmul(input_layer, weight))

tensor([[0.2653, 0.1311, 3.8219, 3.0032]])
tensor([[0.2653, 0.1311, 3.8219, 3.0032]])


### After applying Relu

In [57]:
import torch.nn as nn

relu = nn.ReLU()

input_layer = torch.tensor([[ 0.0401, -0.9005, 0.0397, -0.0876]])

weight_1 = torch.tensor([[-0.1094, -0.8285, 0.0416, -1.1222],
                         [0.3327, -0.0461, 1.4473, -0.8070],
                         [0.0681, -0.7058, -1.8017, 0.5857],
                         [0.8764, 0.9618, -0.4505, 0.2888]])

weight_2 = torch.tensor([[0.6856, -1.7650, 1.6375, -1.5759],
                        [-0.1092, -0.1620, 0.1951, -0.1169],
                        [-0.5120, 1.1997, 0.8483, -0.2476],
                        [-0.3369, 0.5617, -0.6658, 0.2221]])

weight_3 = torch.tensor([[0.8824, 0.1268, 1.1951, 1.3061],
                        [-0.8753, -0.3277, -0.1454, -0.0167],
                        [0.3582, 0.3254, -1.8509, -1.4205],
                        [0.3786, 0.5999, -0.5665, -0.3975]])

# Apply non-linearity on hidden_1 and hidden_2
hidden_1_activated = relu(torch.matmul(input_layer, weight_1))
hidden_2_activated = relu(torch.matmul(hidden_1_activated, weight_2))
print(torch.matmul(hidden_2_activated, weight_3))

# Apply non-linearity in the product of first two weights.
weight_composed_1_activated = relu(torch.matmul(weight_1, weight_2))

# Multiply weight_composed_1_activated with weight_3
weight = torch.matmul(weight_composed_1_activated, weight_3)

# Multiply input_layer with weight
print(torch.matmul(input_layer, weight))

tensor([[-0.2770, -0.0345, -0.1410, -0.0664]])
tensor([[-0.2117, -0.4782,  4.0438,  3.0417]])


### Predicted scores are -1.2 for class 0(cat), 0.12 for class 1(car) and 4.8 for class 2(frog). The ground truth is class 2(frog). Compute the loss function in PyTorch. Initialize the tensor of scores with numbers [[-1.2, 0.12, 4.8]], and the tensor of ground truth [2]. Initiate the cross-entropy loss and call it criterion. Compute and print the loss.

In [58]:
import torch
import torch.nn as nn
# Initialize the scores and ground truth
logits = torch.tensor([[-1.2, 0.12, 4.8]])
ground_truth = torch.tensor([2])

# Instantiate cross entropy loss
criterion = nn.CrossEntropyLoss()

# Compute and print the loss
loss = criterion(logits, ground_truth)
print(loss)

tensor(0.0117)


### Loss function of random scores

In [59]:
# import torch and torch.nn
import torch
import torch.nn as nn

# initialize logits and ground truth
logits = torch.rand(1, 1000)
ground_truth = torch.tensor([111])

# instantiate cross-entropy loss
criterion = nn.CrossEntropyLoss()

# calculate and print the loss
loss = criterion(logits, ground_truth)
print(loss)

tensor(6.5051)


## Training a Neural Network

### Preparing Dataset in Pytorch

In [70]:
import torch
import torchvision # package which deals with datasets and pretrained neural nets
import torch.utils.data
import torchvision.transforms as transforms

# define a transformation of images to torch tensors, using transforms.ToTensor() function
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.4914, 0.48216, 0.44653), (0.24703, 0.24349, 0.26159))])

### Step1: Datasets and Dataloaders

In [71]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transform)

# root tells the location of the dataset

# set the download flag to True, which tells the PyTorch that if dataset is not in the specified folder, to download and put it there

# transforming images to torch tensors by applying the transformation

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

# The first argument of the object is the dataset.

# Then we decide the size of the minibatch. Our dataset is too large to be used entirely, instead we decide for each iteration to use only 32 randomly sampled images.

# Random part come from shuffle flag.

# And finally, we decide how many processes we are going to use to fetch the data in num_workers.

testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


### inspecting the dataloader

In [62]:
# Compute the shape of the training set and testing set
print(testloader.dataset.data.shape, trainloader.dataset.data.shape)

# Print the computed shapes
print(trainset_shape, testset_shape)

# Compute the size of the minibatch for training set and testing set
trainset_batchsize = trainloader.batch_size

testset_batchsize = testloader.batch_size

# Print sizes of the minibatch
print(trainset_batchsize, testset_batchsize)

(10000, 32, 32, 3) (50000, 32, 32, 3)


NameError: name 'trainset_shape' is not defined

### Preparing MNIST dataset

In [63]:
# Transform the data to torch tensors and normalize it
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307), ((0.3081)))])

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# root tells the location of the dataset

# set the download flag to True, which tells the PyTorch that if dataset is not in the specified folder, to download and put it there

# transforming images to torch tensors by applying the transformation

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=0)

# The first argument of the object is the dataset.

# Then we decide the size of the minibatch. Our dataset is too large to be used entirely, instead we decide for each iteration to use only 32 randomly sampled images.

# Random part comes from shuffle flag.

### Inspecting the dataloader

In [64]:
# Compute the shape of the training set and testing set
trainset_shape = trainloader.dataset.data.shape
testset_shape = testloader.dataset.data.shape

# Print the computed shapes
print(trainset_shape, testset_shape)

# Compute the size of the minibatch for training set and testing set
trainset_batchsize = trainloader.batch_size
testset_batchsize = testloader.batch_size

# Print sizes of the minibatch
print(trainset_batchsize, testset_batchsize)

torch.Size([60000, 28, 28]) (10000, 32, 32, 3)
32 32


### Step2: Creating a neural network

In [72]:
import torch

import torch.nn as nn

import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 500)
        self.fc2 = nn.Linear(500, 10)
# Instantiate all 2 linear layers
        
# Use the instantiated layers and return x
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

### Training the Neural Network

First we instantiate the net, the loss(cross-entropy) and the optimizer(Adam).
We chose the Adam optimizer which works very well, and is a version of gradient descent.
Then we loop 10 times over the entire dataset. We use zero_grad() function in order to not accumulate gradients from the previous iterations.
When using the iterators, we need to keep track of the number of items in the iterator. This is achieved by an in-built method called enumerate()
The forward step is done using net(inputs), giving us the result (in this case output)
We compute the loss function in the next line, and then we compute the gradients using loss.backend()
Finally, we change the weights using our optimizer with the optimizer.step() command
The line inputs = inputs.view(-1, 32 * 32 * 3) simply puts all the entries of the images into vectors.

In [73]:
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=3e-4)

for epoch in range(10): # loop over the dataset multiple times
    for i, data in enumerate(trainloader, 0):
        # Get the inputs
        inputs, labels = data
        inputs = inputs.view(-1, 32 * 32 * 3)
        
# Zero the parameter gradients
        optimizer.zero_grad()
    
        outputs = net(inputs) # Forward + backward + optimize 
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

### Using the net to get predictions

we first set the net in test (evaluation) mode using net.eval()
The network gives us scores for each class, and
we get the class with the highest score (using max function) as prediction.
we save the predictions and compute the accuracy.

In [75]:
correct, total = 0, 0
predictions = []
net.eval()

for i, data in enumerate(testloader, 0):
    inputs, labels = data
    inputs = inputs.view(-1, 32 * 32 * 3)
    
    # Do the forward pass and get the predictions
    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    predictions.append(outputs)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))

The testing set accuracy of the network is: 52 %


## CNN: OOP vs Functional

In [77]:
import torch
import torch.nn

image = torch.rand(16, 3, 32, 32)

conv_filter = torch.nn.Conv2d(in_channels=3, out_channels=1, kernel_size=5, stride=1, padding=0)

# In torch.nn we need to create a Conv2d filter with these parameters (input channels, output channels, filter size, stride, padding)

output_feature = conv_filter = conv_filter(image)
print(output_feature.shape)

torch.Size([16, 1, 28, 28])


In [78]:
import torch
import torch.nn.functional as F

image = torch.rand(16, 3, 32, 32)

filter = torch.rand(1, 3, 5, 5)

# in functional, you simply create a random filter

out_feat_F = F.conv2d(image, filter, stride=1, padding=0)

# to add padding, we just put 1 in the padding parameter.

print(out_feat_F.shape)

torch.Size([16, 1, 28, 28])


### Max Pooling

In [80]:
import torch
import torch.nn

im = torch.Tensor([[[[3, 1, 3, 5], [6, 0, 7, 9], [3, 2, 1, 4], [0, 2, 4, 3]]]])

max_pooling = torch.nn.MaxPool2d(2)

output_feature = max_pooling(im)

print(output_feature)

tensor([[[[6., 9.],
          [3., 4.]]]])


In [81]:
import torch
import torch.nn.functional as F

im = torch.Tensor([[[[3, 1, 3, 5], [6, 0, 7, 9], [3, 2, 1, 4], [0, 2, 4, 3]]]])

output_feature_F = F.max_pool2d(im, 2)

print(output_feature_F)

tensor([[[[6., 9.],
          [3., 4.]]]])


### Average pooling

In [83]:
import torch
import torch.nn

im = torch.Tensor([[[[3, 1, 3, 5], [6, 0, 7, 9], [3, 2, 1, 4], [0, 2, 4, 3]]]])

max_pooling = torch.nn.AvgPool2d(2)

output_feature = max_pooling(im)

print(output_feature)

tensor([[[[2.5000, 6.0000],
          [1.7500, 3.0000]]]])


In [84]:
import torch
import torch.nn.functional as F

im = torch.Tensor([[[[3, 1, 3, 5], [6, 0, 7, 9], [3, 2, 1, 4], [0, 2, 4, 3]]]])

output_feature_F = F.avg_pool2d(im, 2)

print(output_feature_F)

tensor([[[[2.5000, 6.0000],
          [1.7500, 3.0000]]]])


## Training Convolutional Neural Network

In [87]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

### Dataloaders

In [89]:
# define a transformation of images to torch tensors, using transforms.ToTensor() function
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.48216, 0.44653), (0.24703, 0.24349, 0.26159))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# root tells the location of the dataset

# set the download flag to True, which tells the PyTorch that if dataset is not in the specified folder, to download and put it there

# transforming images to torch tensors by applying the transformation

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=4)

# The first argument of the object is the dataset.

# Then we decide the size of the minibatch. Our dataset is too large to be used entirely, instead we decide for each iteration to use only 32 randomly sampled images.

# Random part comes from shuffle flag.

# And finally, we decide how many processes we are going to use to fetch the data in num_workers.

testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


 ### Building a CNN

In [105]:
class Net(nn.Module):
    def __init__(self, num_classes=10):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Linear(128 * 4 * 4, num_classes) # The first dimension of it is the number of units the last layer had (depth) x H x W
        # 4 comes from dividing 32 by 2 three times, for each of the pooling we apply after conv filters
        
    def forward(self, x): # applying parameters to the input
        x = self.pool(F.relu(self.conv1(x))) # apply the firs conv filter to the input, following by relu (using functional way) and by the pooling layer
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
            
        x = x.view(-1, 128 * 4 * 4)
        # prepare the net for the fully connected layer, by squeezing all three dimensions of depth (128), width (4) and height (4) in one dimension,
        return self.fc(x) # and then apply the fully connected layer

### Optimizer and Loss Function

In [106]:
net=Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=3e-4)

### Training a CNN

In [107]:
for epoch in range(10): #loop over all the data in trainloader multiple times
    for i, data in enumerate(trainloader, 0):
        # Get the inputs
        inputs, labels = data
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        # Forward + backward + optimize
        outputs = net(inputs) #get the data from the loader, and pass it to the net, with the network giving us the predictions
        loss = criterion(outputs, labels) # compute the loss function based on the predictions and the labels
        loss.backward() # compute the gradients using backward()
        optimizer.step() # update the weights using our optimizer
        
    print('Finished Training')

Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training


### Evaluating the results

In [108]:
correct, total = 0, 0
predictions = []
net.eval()
for i,data in enumerate(testloader, 0):
    inputs, labels = data
    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    predictions.append(outputs)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))
# divide the number of correct predictions by the total number of points in the testing set

The testing set accuracy of the network is: 74 %


## Sequential module - init method

In [111]:
class Net(nn.Module):
    def __init__(self, num_classes):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, padding=1)
        
        self.relu = nn.ReLU()
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(7 * 7 * 40, 1024)
        self.fc2 = nn.Linear(1024, 2048)
        self.fc3 = nn.Linear(2048, 10)

### The sequential module - forward() method

In [113]:
def forward():
    x = self.relu(self.conv1(x))
    x = self.relu(self.pool(self.conv2(x)))
    x = self.relu(self.conv3(x))
    x = self.relu(self.pool(self.conv4(x)))
    x = x.view(-1, 7 * 7 * 40)
    x = self.relu(self.fc1(x))
    x = self.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [114]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Declare all the layers for feature extraction
        self.features = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1),
                                     nn.ReLU(inplace=True),
                                     nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1),
                                     nn.MaxPool2d(2, 2), nn.ReLU(inplace=True),
                                     nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, padding=1),
                                     nn.ReLU(inplace=True),
                                     nn.Conv2d(in_channels=20, out_channels=40, kernel_size=3, padding=1),
                                     nn.MaxPool2d(2, 2), nn.ReLU(inplace=True))
        
        # Declare all the layers of classification
        self.classifier = nn.Sequential(nn.Linear(7 * 7 * 40, 1024), nn.ReLU(inplace=True),
                                        nn. Linear(1024, 2048), nn.ReLU(inplace=True),
                                        nn.Linear(2048, 10))
        
        # define all the convolutions, poolings, fully-connected layers etc same as before,
        # but now the order of operators matters also in declaration
        # Additionally, we encapsulate them within nn.Sequential()

In [115]:
def forward(self, x): # instead of applying each operation, we actually need to apply each sequential module
    
    # Apply the feature extractor in the input
    x = self.features(x) # give the images to the first module which we called features and contains all the convolutional and pooling layers
    
    # Squeeze the three spatial dimensions in one
    x = x.view(-1, 7 * 7 * 40)
    
    # Classify the images
    x = self.classifier(x) # apply the classifier containing three fully connected layers.
    return x

## Using validation sets in PyTorch

In [130]:
import torch
import torchvision # package which deals with datasets and pretrained neural nets
import torch.utils.data
import torchvision.transforms as transforms
import numpy as np

# Shuffle the indices
indices = np.arange(60000) #Use numpy.arange() to create an array containing numbers [0, 59999] and then randomly shuffle the array.
np.random.shuffle(indices)

# Build the train loader
train_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST(root='./data', download=True, train=True,
                     transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
                     batch_size=64, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[:55000]))

# Build the validation loader
val_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST(root='./data', download=True, train=True,
                     transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
                     batch_size=64, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[:55000]))

# Build the test loader
test_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST(root='./data', download=True, train=False,
                     transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])),
                     batch_size=64, shuffle=False)

### Building CNN

In [134]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Instantiate two convolutional layers
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=5, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=5, out_channels=10, kernel_size=3, padding=1)

        # Instantiate the ReLU nonlinearity
        self.relu = nn.ReLU()
        
        # Instantiate a fully connected layer
        self.pool = nn.MaxPool2d(2, 2)
        
        # Instantiate a fully connected layer
        self.fc = nn.Linear(7 * 7 * 10, 10)
        
# The first dimension of it is the number of units the last layer had (depth) x H x W
# 7 comes from dividing 28 by 2 three times, for each of the pooling we apply after conv filters (MNIST images are of size 1x28x28)
        
    def forward(self, x): # applying parameters to the input
        x = self.pool(F.relu(self.conv1(x))) # apply the firs conv filter to the input, following by relu (using functional way) and by the pooling layer
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 7 * 7 * 10)
        # prepare the net for the fully connected layer, by squeezing all three dimensions of depth, width and height in one dimension,
        return self.fc(x) # and then apply the fully connected layer

### Optimizer & Loss Function

In [135]:
net=Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=3e-4)

### Training CNN

In [136]:
for epoch in range(10): #loop over all the data in trainloader multiple times
    for i, data in enumerate(train_loader, 0):
        # Get the inputs
        inputs, labels = data
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        # Forward + backward + optimize
        outputs = net(inputs) #get the data from the loader, and pass it to the net, with the network giving us the predictions
        loss = criterion(outputs, labels) # compute the loss function based on the predictions and the labels
        loss.backward() # compute the gradients using backward()
        optimizer.step() # update the weights using our optimizer
        
    print('Finished Training')

Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training
Finished Training


### Evaluating the result

In [145]:
correct, total = 0, 0
predictions = []
net.eval()
for i,data in enumerate(val_loader, 0):
    inputs, labels = data
    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    predictions.append(outputs)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))
# divide the number of correct predictions by the total number of points in the testing set

The testing set accuracy of the network is: 98 %


In [146]:
correct, total = 0, 0
predictions = []
net.eval()
for i,data in enumerate(test_loader, 0):
    inputs, labels = data
    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    predictions.append(outputs)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
print('The testing set accuracy of the network is: %d %%' % (100 * correct / total))
# divide the number of correct predictions by the total number of points in the testing set

The testing set accuracy of the network is: 98 %


## Dropout

In [148]:
class Net(nn.Module):
    def __init__(self):
        
        # Define all the parameters of the net
        self.calssifier = nn.Sequential(
            nn.Linear(28*28, 200),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(200, 500),
            nn.ReLU(inplace=True),
            nn.Linear(500, 10))

# Remember for efficiency to use inplace=True as argument in ReLU() nonlinearity.
# Don't forget that the order of operations in sequential module matters.
# A fully connected (linear) layer takes as first argument, the number of units in the previous layer, and as second argument, the number of units in the next layer.
# As a reminder, the number of classes is 10.

In [150]:
# Apply the forward pass in the forward() method

def forward(self, x):
    
    # Do the forward pass
    return self.classifier(x)

# The entire network is contained in self.classifier.

### Batch Normalization

In [151]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Implement the sequential module for feature extraction
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channel=10, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(2, 2), nn.ReLU(inplace=True), nn.BatchNorm2d(10),
            nn.Conv2d(in_channels=10, out_channel=20, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(2, 2), nn.ReLU(inplace=True), nn.BatchNorm2d(20))
        
        #Implement the fully connected layer for classification
        self.fc = nn.Linear(in_features=7*7*20, out_features=10) # we use pooling twice, each time halving the size of the image.