<a href="https://colab.research.google.com/github/ammalik221/Mini-Projects/blob/master/PyTorch_Warm_up.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
######################################################
################# Basic Operations ###################
######################################################

import torch

""" Torch tensors are similar to numpy's ndarray, the difference is just 
that, they can be easily transferred to GPU. Any operation that we want 
to perform on ndarray can also be performed on tensors.
"""

# a 2x3 tensor
n_tensor = torch.Tensor([[1,2,3],
                         [5,6,7]])
print("New Tensor - \n", n_tensor,  "\n")

# a 2x3 tensor of zeros
z_tensor = torch.zeros(2, 3)
print("Tensor with all zeros - \n", z_tensor, "\n")

# a 2x3 tensor of random values in the range [0, 1)
r_tensor = torch.rand(2, 3)
print("Tensor with random value - \n", r_tensor, "\n")

# accessing a scalar object
s_tensor = torch.Tensor([[1,2,3],
                         [5,6,7]])
print("Element at a position - \n", s_tensor[0][1].item(), "\n")

# slicing
slice_tensor = torch.Tensor([[1,2,3],
                             [5,6,7]])
print("All rows, first column\n", slice_tensor[:, 0], "\n")
print("All rows, last column\n", slice_tensor[:, -1], "\n")
print("All columns, first row\n", slice_tensor[0, :], "\n")
print("All columns, last column\n", slice_tensor[-1, :], "\n")

# info of tensor
i_tensor = torch.Tensor([[1,2,3],
                         [5,6,7],
                         [8,9,10]])
print("Type - \n", i_tensor.type(), "\n")
print("Shape - \n", i_tensor.shape, "\n")
print("Dimensions - \n", i_tensor.dim(), "\n")

# reshape
re_tensor = torch.Tensor([[1, 2], [3, 4]])
print("Tensor before reshaping - \n", re_tensor, "\n")

re_tensor.view(1, 4)
print("Tensor after reshaping - \n", re_tensor, "\n")

# to and from numpy
nm_tensor = torch.Tensor([[1, 2], [3, 4]])
print("Type before changing tensor to array - ", type(nm_tensor), "\n")
nm_array = nm_tensor.numpy()
print("Type before changing tensor to array - ", type(nm_array), "\n")

# tensor operations
a = torch.randn(3, 3)
b = torch.randn(3, 3)

print("Tensor A - \n", a, "\n")
print("Tensor B - \n", b, "\n")
print("Transpose\n", a.t(), "\n")
print("Matrix Multiplication\n", a.mm(b), "\n")
print("Element-wise multiplication\n", a.mul(b), "\n")

# cuda related operations
print("True if GPU is available\n", torch.cuda.is_available(), "\n")

# transfer tensor to CPU
a.cpu()

# transfer tensor to GPU
a.cuda()

New Tensor - 
 tensor([[1., 2., 3.],
        [5., 6., 7.]]) 

Tensor with all zeros - 
 tensor([[0., 0., 0.],
        [0., 0., 0.]]) 

Tensor with random value - 
 tensor([[0.4279, 0.1944, 0.3696],
        [0.7924, 0.3070, 0.2803]]) 

Element at a position - 
 2.0 

All rows, first column
 tensor([1., 5.]) 

All rows, last column
 tensor([3., 7.]) 

All columns, first row
 tensor([1., 2., 3.]) 

All columns, last column
 tensor([5., 6., 7.]) 

Type - 
 torch.FloatTensor 

Shape - 
 torch.Size([3, 3]) 

Dimensions - 
 2 

Tensor before reshaping - 
 tensor([[1., 2.],
        [3., 4.]]) 

Tensor after reshaping - 
 tensor([[1., 2.],
        [3., 4.]]) 

Type before changing tensor to array -  <class 'torch.Tensor'> 

Type before changing tensor to array -  <class 'numpy.ndarray'> 

Tensor A - 
 tensor([[ 0.4958,  0.1476, -0.1507],
        [ 0.0566,  2.5377,  0.4458],
        [ 2.0285, -0.3072,  0.3294]]) 

Tensor B - 
 tensor([[ 0.6230, -0.8059, -0.4829],
        [-0.3902,  0.2282, -2.27

tensor([[ 0.4958,  0.1476, -0.1507],
        [ 0.0566,  2.5377,  0.4458],
        [ 2.0285, -0.3072,  0.3294]], device='cuda:0')

In [3]:
######################################################
######## A two layer network with Autograd ###########
######################################################

"""
Autograd is basically automatic differentiation, if we want to compute
gradients of a tensor, we set requires_grad = True. Setting this parameter
to true allows us to perform backpropogation on it and the gradient of that
tensor is stored in another tensor named tensor_name.grad.

The two layer network dimensions are - 
    input - 1000
    hidden - 100
    output - 10
"""

if torch.cuda.is_available:
    device = "cuda"
else:
    device = "cpu"

# define necessary variables
batch_size = 64
input_size = 1000
hidden_size = 100
output_size = 10

# random valued tensors for input and output
x = torch.randn(batch_size, input_size, device = device)
y = torch.randn(batch_size, output_size, device = device)

# initially weights are initialised as random
# since we want to perform backpropogation on the weights, we set requires_grad=True
w1 = torch.randn(input_size, hidden_size, requires_grad=True, device = device)
w2 = torch.randn(hidden_size, output_size, requires_grad=True, device = device)

learning_rate = 1e-6
epochs = 500
for e in range(epochs):

    # calculate output of our model
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # computing and printing loss
    loss = ((y_pred - y).pow(2).sum())
    print(e, loss.item())

    # .backward() calcualtes gradient of loss w.r.t all Tensores with 
    # requires_grad = True
    loss.backward()

    # since we don't want to build a computational graph for these steps
    # we use no_grad() function
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # zero out the gradients after each epoch, otherwise they
        # continue to pile up, for all epochs
        w1.grad.zero_()
        w2.grad.zero_()


0 23363194.0
1 17200710.0
2 15298066.0
3 15342801.0
4 16086257.0
5 16466439.0
6 15688248.0
7 13565120.0
8 10579801.0
9 7550438.0
10 5055274.5
11 3280721.0
12 2121585.5
13 1400894.25
14 958721.5625
15 686110.125
16 513923.1875
17 401541.3125
18 325029.21875
19 270632.3125
20 230145.015625
21 198871.09375
22 173929.125
23 153478.78125
24 136345.03125
25 121742.9453125
26 109142.0625
27 98164.2421875
28 88538.0703125
29 80049.1640625
30 72530.296875
31 65838.53125
32 59863.1953125
33 54528.328125
34 49748.796875
35 45449.734375
36 41572.953125
37 38072.67578125
38 34908.1953125
39 32041.353515625
40 29440.9765625
41 27078.25390625
42 24928.87109375
43 22971.33984375
44 21186.193359375
45 19558.171875
46 18070.06640625
47 16708.28515625
48 15461.0185546875
49 14318.5537109375
50 13270.8203125
51 12307.51171875
52 11421.884765625
53 10607.2841796875
54 9862.4580078125
55 9175.83203125
56 8542.2333984375
57 7957.28662109375
58 7416.564453125
59 6916.57568359375
60 6453.9794921875
61 6025.525

In [5]:
######################################################
################# PyTorch nn module ##################
######################################################

"""
nn package provides a set of Modules which can be used to build up a
neural network.
"""

if torch.cuda.is_available:
    device = "cuda"
else:
    device = "cpu"


batch_size, input_size, hidden_size, output_size = 64, 1024, 512, 10

# random valued tensors for input and output
x = torch.randn(batch_size, input_size, device=device)
y = torch.randn(batch_size, output_size, device=device)

# nn.Sequential can be used to define our model as a sequence of layers
# in this case, the input goes through the following transformations
# input --> Linear --> activation(Relu) --> Linear
# Linear module computes output using a linear function
# relu module applies relu function to the ouptut obtained from the last layer
model = torch.nn.Sequential(
          torch.nn.Linear(input_size, hidden_size),
          torch.nn.ReLU(),
          torch.nn.Linear(hidden_size, output_size),
        ).to(device)

learning_rate = 1e-4

# This is mean squared loss and elementwise_mean means the loss is mean
# of squared errors.
loss_fn = torch.nn.MSELoss(reduction='elementwise_mean')

# optim package  provides implementations of commonly used optimization 
# algorithms. The optimizer updates the weights of the model for us.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

epochs = 500
for e in range(epochs):

    # calculate output
    y_pred = model(x)

    # computing and printing loss
    loss = loss_fn(y_pred, y)
    print(e, loss.item())
    
    # zero out all the gradients
    optimizer.zero_grad()

    # compute gradient of the loss with respect to model parameters
    loss.backward()

    # update the parameters
    optimizer.step()



0 1.1975618600845337
1 1.1292026042938232
2 1.0640636682510376
3 1.0019253492355347
4 0.9427267909049988
5 0.8863182067871094
6 0.8325981497764587
7 0.7812609076499939
8 0.7323598265647888
9 0.6857206225395203
10 0.6414801478385925
11 0.5995320677757263
12 0.5596458315849304
13 0.5217723250389099
14 0.4859011769294739
15 0.4518810212612152
16 0.41959983110427856
17 0.389043390750885
18 0.3601694405078888
19 0.33290717005729675
20 0.3071530759334564
21 0.2828802168369293
22 0.2599860429763794
23 0.23844455182552338
24 0.21814508736133575
25 0.1990567445755005
26 0.18118925392627716
27 0.16446658968925476
28 0.14883702993392944
29 0.13428239524364471
30 0.12075912952423096
31 0.1082138642668724
32 0.09661010652780533
33 0.08592303842306137
34 0.07612273097038269
35 0.06715188175439835
36 0.05895792320370674
37 0.0515105314552784
38 0.0447763167321682
39 0.03870749473571777
40 0.033273037523031235
41 0.028420617803931236
42 0.02410227060317993
43 0.020284514874219894
44 0.0169318746775388