<a href="https://colab.research.google.com/github/ammalik221/Mini-Projects/blob/master/PyTorch_Warm_up.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
######################################################
################# Basic Operations ###################
######################################################

import torch

""" Torch tensors are similar to numpy's ndarray, the difference is just 
that, they can be easily transferred to GPU. Any operation that we want 
to perform on ndarray can also be performed on tensors.
"""

# a 2x3 tensor
n_tensor = torch.Tensor([[1,2,3],
                         [5,6,7]])
print("New Tensor - \n", n_tensor,  "\n")

# a 2x3 tensor of zeros
z_tensor = torch.zeros(2, 3)
print("Tensor with all zeros - \n", z_tensor, "\n")

# a 2x3 tensor of random values in the range [0, 1)
r_tensor = torch.rand(2, 3)
print("Tensor with random value - \n", r_tensor, "\n")

# accessing a scalar object
s_tensor = torch.Tensor([[1,2,3],
                         [5,6,7]])
print("Element at a position - \n", s_tensor[0][1].item(), "\n")

# slicing
slice_tensor = torch.Tensor([[1,2,3],
                             [5,6,7]])
print("All rows, first column\n", slice_tensor[:, 0], "\n")
print("All rows, last column\n", slice_tensor[:, -1], "\n")
print("All columns, first row\n", slice_tensor[0, :], "\n")
print("All columns, last column\n", slice_tensor[-1, :], "\n")

# info of tensor
i_tensor = torch.Tensor([[1,2,3],
                         [5,6,7],
                         [8,9,10]])
print("Type - \n", i_tensor.type(), "\n")
print("Shape - \n", i_tensor.shape, "\n")
print("Dimensions - \n", i_tensor.dim(), "\n")

# reshape
re_tensor = torch.Tensor([[1, 2], [3, 4]])
print("Tensor before reshaping - \n", re_tensor, "\n")

re_tensor.view(1, 4)
print("Tensor after reshaping - \n", re_tensor, "\n")

# to and from numpy
nm_tensor = torch.Tensor([[1, 2], [3, 4]])
print("Type before changing tensor to array - ", type(nm_tensor), "\n")
nm_array = nm_tensor.numpy()
print("Type before changing tensor to array - ", type(nm_array), "\n")

# tensor operations
a = torch.randn(3, 3)
b = torch.randn(3, 3)

print("Tensor A - \n", a, "\n")
print("Tensor B - \n", b, "\n")
print("Transpose\n", a.t(), "\n")
print("Matrix Multiplication\n", a.mm(b), "\n")
print("Element-wise multiplication\n", a.mul(b), "\n")

# cuda related operations
print("True if GPU is available\n", torch.cuda.is_available(), "\n")

# transfer tensor to CPU
a.cpu()

# transfer tensor to GPU
a.cuda()

New Tensor - 
 tensor([[1., 2., 3.],
        [5., 6., 7.]]) 

Tensor with all zeros - 
 tensor([[0., 0., 0.],
        [0., 0., 0.]]) 

Tensor with random value - 
 tensor([[0.4279, 0.1944, 0.3696],
        [0.7924, 0.3070, 0.2803]]) 

Element at a position - 
 2.0 

All rows, first column
 tensor([1., 5.]) 

All rows, last column
 tensor([3., 7.]) 

All columns, first row
 tensor([1., 2., 3.]) 

All columns, last column
 tensor([5., 6., 7.]) 

Type - 
 torch.FloatTensor 

Shape - 
 torch.Size([3, 3]) 

Dimensions - 
 2 

Tensor before reshaping - 
 tensor([[1., 2.],
        [3., 4.]]) 

Tensor after reshaping - 
 tensor([[1., 2.],
        [3., 4.]]) 

Type before changing tensor to array -  <class 'torch.Tensor'> 

Type before changing tensor to array -  <class 'numpy.ndarray'> 

Tensor A - 
 tensor([[ 0.4958,  0.1476, -0.1507],
        [ 0.0566,  2.5377,  0.4458],
        [ 2.0285, -0.3072,  0.3294]]) 

Tensor B - 
 tensor([[ 0.6230, -0.8059, -0.4829],
        [-0.3902,  0.2282, -2.27

tensor([[ 0.4958,  0.1476, -0.1507],
        [ 0.0566,  2.5377,  0.4458],
        [ 2.0285, -0.3072,  0.3294]], device='cuda:0')

In [7]:
######################################################
######## A two layer network with Autograd ###########
######################################################

"""
Autograd is basically automatic differentiation, if we want to compute
gradients of a tensor, we set requires_grad = True. Setting this parameter
to true allows us to perform backpropogation on it and the gradient of that
tensor is stored in another tensor named tensor_name.grad.

The two layer network dimensions are - 
    input - 1000
    hidden - 100
    output - 10
"""

if torch.cuda.is_available:
    device = "cuda"
else:
    device = "cpu"

# define necessary variables
batch_size = 64
input_size = 1000
hidden_size = 100
output_size = 10

# random valued tensors for input and output
x = torch.randn(batch_size, input_size, device = device)
y = torch.randn(batch_size, output_size, device = device)

# initially weights are initialised as random
# since we want to perform backpropogation on the weights, we set requires_grad=True
w1 = torch.randn(input_size, hidden_size, requires_grad=True, device = device)
w2 = torch.randn(hidden_size, output_size, requires_grad=True, device = device)

learning_rate = 1e-6
epochs = 500
for e in range(epochs):

    # calculate output of our model
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # computing and printing loss
    loss = ((y_pred - y).pow(2).sum())
    print("Epoch : ", e,"\tLoss - ", loss.item())

    # .backward() calcualtes gradient of loss w.r.t all Tensores with 
    # requires_grad = True
    loss.backward()

    # since we don't want to build a computational graph for these steps
    # we use no_grad() function
    with torch.no_grad():
        w1 -= learning_rate * w1.grad
        w2 -= learning_rate * w2.grad

        # zero out the gradients after each epoch, otherwise they
        # continue to pile up, for all epochs
        w1.grad.zero_()
        w2.grad.zero_()


Epoch :  0 	Loss -  32229532.0
Epoch :  1 	Loss -  29400756.0
Epoch :  2 	Loss -  30142316.0
Epoch :  3 	Loss -  29423168.0
Epoch :  4 	Loss -  24869062.0
Epoch :  5 	Loss -  17342818.0
Epoch :  6 	Loss -  10319248.0
Epoch :  7 	Loss -  5627142.0
Epoch :  8 	Loss -  3123181.0
Epoch :  9 	Loss -  1886224.0
Epoch :  10 	Loss -  1271321.0
Epoch :  11 	Loss -  939996.375
Epoch :  12 	Loss -  740205.625
Epoch :  13 	Loss -  605383.875
Epoch :  14 	Loss -  506331.5625
Epoch :  15 	Loss -  429138.15625
Epoch :  16 	Loss -  366882.25
Epoch :  17 	Loss -  315726.875
Epoch :  18 	Loss -  273048.3125
Epoch :  19 	Loss -  237109.0625
Epoch :  20 	Loss -  206660.90625
Epoch :  21 	Loss -  180720.34375
Epoch :  22 	Loss -  158505.3125
Epoch :  23 	Loss -  139414.15625
Epoch :  24 	Loss -  122948.53125
Epoch :  25 	Loss -  108699.6015625
Epoch :  26 	Loss -  96323.359375
Epoch :  27 	Loss -  85548.8671875
Epoch :  28 	Loss -  76137.15625
Epoch :  29 	Loss -  67909.578125
Epoch :  30 	Loss -  60692.86

In [8]:
######################################################
################# PyTorch nn module ##################
######################################################

"""
nn package provides a set of Modules which can be used to build up a
neural network.
"""

if torch.cuda.is_available:
    device = "cuda"
else:
    device = "cpu"


batch_size, input_size, hidden_size, output_size = 64, 1024, 512, 10

# random valued tensors for input and output
x = torch.randn(batch_size, input_size, device=device)
y = torch.randn(batch_size, output_size, device=device)

# nn.Sequential can be used to define our model as a sequence of layers
# in this case, the input goes through the following transformations
# input --> Linear --> activation(Relu) --> Linear
# Linear module computes output using a linear function
# relu module applies relu function to the ouptut obtained from the last layer
model = torch.nn.Sequential(
          torch.nn.Linear(input_size, hidden_size),
          torch.nn.ReLU(),
          torch.nn.Linear(hidden_size, output_size),
        ).to(device)

learning_rate = 1e-4

# This is mean squared loss and elementwise_mean means the loss is mean
# of squared errors.
loss_fn = torch.nn.MSELoss(reduction='elementwise_mean')

# optim package  provides implementations of commonly used optimization 
# algorithms. The optimizer updates the weights of the model for us.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

epochs = 500
for e in range(epochs):

    # calculate output
    y_pred = model(x)

    # computing and printing loss
    loss = loss_fn(y_pred, y)
    print("Epoch : ", e,"\tLoss - ", loss.item())
    
    # zero out all the gradients
    optimizer.zero_grad()

    # compute gradient of the loss with respect to model parameters
    loss.backward()

    # update the parameters
    optimizer.step()



Epoch :  0 	Loss -  1.1709027290344238
Epoch :  1 	Loss -  1.1026450395584106
Epoch :  2 	Loss -  1.0374784469604492
Epoch :  3 	Loss -  0.9752761721611023
Epoch :  4 	Loss -  0.916059136390686
Epoch :  5 	Loss -  0.859638512134552
Epoch :  6 	Loss -  0.8060597777366638
Epoch :  7 	Loss -  0.7551005482673645
Epoch :  8 	Loss -  0.7068974375724792
Epoch :  9 	Loss -  0.6613147854804993
Epoch :  10 	Loss -  0.6182137727737427
Epoch :  11 	Loss -  0.5773984789848328
Epoch :  12 	Loss -  0.5387842655181885
Epoch :  13 	Loss -  0.5021977424621582
Epoch :  14 	Loss -  0.4675353169441223
Epoch :  15 	Loss -  0.43473315238952637
Epoch :  16 	Loss -  0.4037090837955475
Epoch :  17 	Loss -  0.37448650598526
Epoch :  18 	Loss -  0.3468888998031616
Epoch :  19 	Loss -  0.32081228494644165
Epoch :  20 	Loss -  0.29626375436782837
Epoch :  21 	Loss -  0.2731039524078369
Epoch :  22 	Loss -  0.25130006670951843
Epoch :  23 	Loss -  0.23075710237026215
Epoch :  24 	Loss -  0.21147333085536957
Epoch : 