<a href="https://colab.research.google.com/github/ferdouszislam/pytorch-practice/blob/main/pytorch_practice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import torch
import numpy as np

import torch.nn as nn # required at Tutorial 06

In [2]:
if torch.cuda.is_available():
  print('GPU, yay!')
else:
  print('CPU :(')

CPU :(


# Tutorial 02 - Tensor Basics

In [3]:
x = torch.rand(3,3)
y = torch.rand(3,3)

In [4]:
print(x, '\n', y, '\n')

tensor([[0.6481, 0.4903, 0.0836],
        [0.2646, 0.7901, 0.0386],
        [0.0805, 0.3598, 0.3267]]) 
 tensor([[0.4114, 0.5518, 0.9380],
        [0.1188, 0.1703, 0.4574],
        [0.9259, 0.1294, 0.2896]]) 



In [5]:
z = torch.add(x, y)
z

tensor([[1.0594, 1.0421, 1.0216],
        [0.3834, 0.9604, 0.4959],
        [1.0064, 0.4892, 0.6163]])

In [6]:
z = torch.mul(x, y)
z 

tensor([[0.2666, 0.2705, 0.0784],
        [0.0314, 0.1345, 0.0176],
        [0.0745, 0.0466, 0.0946]])

In [7]:
z.add_(y) # same as z+=y

tensor([[0.6779, 0.8223, 1.0164],
        [0.1503, 0.3048, 0.4750],
        [1.0004, 0.1760, 0.3842]])

In [8]:
z[:, 2] # get all rows at column 2 (0 based indexing)

tensor([1.0164, 0.4750, 0.3842])

In [9]:
z[1,2].item() # get single element value

0.4749874770641327

In [10]:
'''
input- a pytorch tensor variable 
returns- multiplication of the input tensor's dimensions 
'''
def get_flat_shape(tensor):
  dims=list(tensor.size())
  flat_dim = 1
  for dim in dims:
    flat_dim*=dim
  return flat_dim

In [11]:
get_flat_shape(z)

9

In [12]:
flat_z = z.view(get_flat_shape(z)) # resizing a tensor
flat_z

tensor([0.6779, 0.8223, 1.0164, 0.1503, 0.3048, 0.4750, 1.0004, 0.1760, 0.3842])

In [13]:
# tensor to numpy array conversion
np_z = flat_z.clone().numpy() # using '.clone()' is a MUST
np_z

array([0.67794675, 0.8223072 , 1.0164254 , 0.15028793, 0.304846  ,
       0.47498748, 1.000425  , 0.17596233, 0.3842081 ], dtype=float32)

In [14]:
# numpy array to tensor conversion
flat_z = torch.from_numpy(np_z.copy()) # using '.copy()' is a MUST
flat_z

tensor([0.6779, 0.8223, 1.0164, 0.1503, 0.3048, 0.4750, 1.0004, 0.1760, 0.3842])

  
  **Tensors can be kept into GPU but numpy arrays have to remain on CPU. GPUs are generally faster.**  


In [15]:
device = False
if torch.cuda.is_available():
  device = torch.device("cuda")

In [16]:
# all operations on tensors to be done in GPU
# x = torch.rand(2, 2).to(device)
# y = torch.rand(2, 2).to(device)
# z=x+y

# print(x, '\n', y, '\n', z)

In [17]:
# numpy arrays MUST be on cpu
z = z.to('cpu')
np_z = z.clone().numpy()
np_z

array([[0.67794675, 0.8223072 , 1.0164254 ],
       [0.15028793, 0.304846  , 0.47498748],
       [1.000425  , 0.17596233, 0.3842081 ]], dtype=float32)

# Tutorial 03 - Gradient Calculation with Autograd

In [18]:
x = torch.randn(3, requires_grad=True)
x

tensor([ 0.0305, -0.9509, -0.4911], requires_grad=True)

In [19]:
y=x+2
print(y)
z=y*y*2
print(z)
z = z.mean()
print(z) 

tensor([2.0305, 1.0491, 1.5089], grad_fn=<AddBackward0>)
tensor([8.2458, 2.2012, 4.5533], grad_fn=<MulBackward0>)
tensor(5.0001, grad_fn=<MeanBackward0>)


In [20]:
# calculating dz/dx for each element of x tensor (in this case- x1,x2,x3)
#  N.B- All but the last call to backward should have the retain_graph=True option
z.backward(retain_graph=True)
print(x.grad)

tensor([2.7073, 1.3988, 2.0118])


In [21]:
# prevent gradient tracking 
# (might be needed when updating weights during training)

x = torch.randn(3, requires_grad=True)
print(x)

# way 1
print('way 1')
y=x
y.requires_grad_(False)
print(x)

# way 2
print('way 2')
y=x.detach()
print(y)

# way 3
print('way 3')
y=x+2
print(x, '\n', y)
with torch.no_grad():
  y=x+2
  print(x, '\n', y)

tensor([ 0.0969, -1.2457, -0.5860], requires_grad=True)
way 1
tensor([ 0.0969, -1.2457, -0.5860])
way 2
tensor([ 0.0969, -1.2457, -0.5860])
way 3
tensor([ 0.0969, -1.2457, -0.5860]) 
 tensor([2.0969, 0.7543, 1.4140])
tensor([ 0.0969, -1.2457, -0.5860]) 
 tensor([2.0969, 0.7543, 1.4140])


In [22]:
# dummy training example with some weights

weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
  model_output = (weights*3).sum() # loss function... probably
  
  model_output.backward()
  print(weights.grad)

  # before next iteration or optimization step MUST empty the gradient
  weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


# Tutorial 04 - Back Propagation

In [23]:
# example backpropagation for a single instance

x = torch.tensor(1.0) # input
y = torch.tensor(2.0) # actual output

w = torch.tensor(1.0, requires_grad=True) # weight i.e, learnable parameter

# forward pass
y_hat = w*x # y_hat is the prediction using linear model = w*x 
loss = (y_hat - y)**2 # loss function = squared error (generally this would be MSE)

print(loss)

# backward pass
loss.backward()
print(w.grad)

# now update weight using the gradient 
# and do forward and backward pass again

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


# Tutorial 05 - Gradient Descent with Autograd & Backpropagation



In [24]:
# implementing linear regression from scratch with dummy data

# f = w*x, for the example model below- w = 2 fits the ouput completely 
X = np.array([1,2,3,4], dtype=np.float32) # input
Y = np.array([2,4,6,8], dtype=np.float32) # actual output

# randomely initializing weight
w = 0.0

# model prediction
def forward(x):
  return w*x

# loss function, MSE
def loss(y, y_pred):
  return ((y_pred-y)**2).mean()

# gradient
# here, loss, J = 1/N * (w*x-y)^2 [because y_pred = w*x]
# therefore, dJ/dw = 1/N*2*x*(w*x-y)
def gradient(x, y, y_pred):
  return np.dot(2*x, y_pred-y).mean()

print(f'Prediction before training for x=5 : {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 5

print('\n[Training started...]\n')
for epoch in range(n_iters):
  # prediction, forward pass
  y_pred = forward(X)

  # loss
  J = loss(Y, y_pred)

  # gradient
  dJ_dw = gradient(X, Y, y_pred)

  # update weights
  w = w - learning_rate* dJ_dw 

  # print everytime
  if epoch%1==0:
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {J:.8f}')
print('\n[Training finished...]\n')

print(f'Prediction after training for x=5 : {forward(5):.3f}')

Prediction before training for x=5 : 0.000

[Training started...]

epoch 1: w = 1.200, loss = 30.00000000
epoch 2: w = 1.680, loss = 4.79999924
epoch 3: w = 1.872, loss = 0.76800019
epoch 4: w = 1.949, loss = 0.12288000
epoch 5: w = 1.980, loss = 0.01966083

[Training finished...]

Prediction after training for x=5 : 9.898


### Now let's do the same using Autograd for backward pass

In [25]:
# implementing linear regression from scratch with dummy data

# f = w*x, for the example model below- w = 2 fits the ouput completely 
X = torch.tensor([1,2,3,4], dtype=torch.float32) # input
Y = torch.tensor([2,4,6,8], dtype=torch.float32) # actual output

# randomely initializing weight
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
  return w*x

# loss function, MSE
def loss(y, y_pred):
  return ((y_pred-y)**2).mean()

# gradient
# here, loss, J = 1/N * (w*x-y)^2 [because y_pred = w*x]
# therefore, dJ/dw = 1/N*2*x*(w*x-y)
# def gradient(x, y, y_pred):
#   return np.dot(2*x, y_pred-y).mean()

print(f'Prediction before training for x=5 : {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20

print('\n[Training started...]\n')
for epoch in range(n_iters):
  # prediction i.e forward pass
  y_pred = forward(X)

  # loss
  J = loss(Y, y_pred)

  # calculate gradient i.e backward pass
  #dJ_dw = gradient(X, Y, y_pred)
  J.backward() # dJ/dw

  # update weights
  #w = w - learning_rate* dJ_dw 
  w.data = w.data - learning_rate * w.grad
  # alternately we can do this,
  # with torch.no_grad(): 
  #   # update to weight should not be tracked for calculating gradient
  #   w -= learning_rate*w.grad

  w.grad.zero_() # clear the gradients 

  if epoch%2==1:
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {J:.8f}')
print('\n[Training finished...]\n')

print(f'Prediction after training for x=5 : {forward(5):.3f}')

Prediction before training for x=5 : 0.000

[Training started...]

epoch 2: w = 0.555, loss = 21.67499924
epoch 4: w = 0.956, loss = 11.31448650
epoch 6: w = 1.246, loss = 5.90623236
epoch 8: w = 1.455, loss = 3.08308983
epoch 10: w = 1.606, loss = 1.60939169
epoch 12: w = 1.716, loss = 0.84011245
epoch 14: w = 1.794, loss = 0.43854395
epoch 16: w = 1.851, loss = 0.22892261
epoch 18: w = 1.893, loss = 0.11949898
epoch 20: w = 1.922, loss = 0.06237914

[Training finished...]

Prediction after training for x=5 : 9.612


# Tutorial 06 - Training Pipeline: Model, Loss, and Optimizer

### Implementing Linear Regression same as before but this time with model, loss, optimizer, autograd from the **torch.nn** library

In [43]:
'''
Training Pipeline

1. design model (input size, output size, forward pass)
2. Construct loss & optimizer
3. Training loop
  - forward pass: compute prediction
  - backward pass: compute gradients
  - update weights
'''
# implementing linear regression from scratch with dummy data

# f = w*x, for the example model below- w = 2 fits the ouput completely 
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32) # input
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32) # actual output

n_samples, n_features = X.shape # 4 samples each with one feature
print('# of samples =', n_samples, ' # of features =', n_features, '\n')

# define custom model (same as Linear Regression for now)
class MyLinearRegression(nn.Module):

  def __init__(self, input_dim, output_dim):
    super(MyLinearRegression, self).__init__()
    # define layers
    self.lin = nn.Linear(input_dim, output_dim)

  def forward(self, x):
    return self.lin(x)

# model = nn.Linear(in_features=n_features, out_features=1)
model = MyLinearRegression(input_dim=n_features, output_dim=1)

# test data
X_test = torch.tensor([5], dtype=torch.float32)

print(f'Prediction before training for x=5 : {model(X_test).item():.3f}')

# Training
learning_rate = 0.01
n_iters = 20 
# MSE as loss function
loss = nn.MSELoss()
# optimize model with stochastic gradient descent
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

print('\n[Training started...]\n')
for epoch in range(n_iters):
  # prediction i.e forward pass
  y_pred = model(X)

  # loss
  J = loss(Y, y_pred)

  # calculate gradient i.e backward pass
  J.backward() # dJ/dw

  # update weights using optimizer
  optimizer.step()
  optimizer.zero_grad() # clear the gradients 

  if epoch%2==1:
    [w, b] = model.parameters()
    print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {J:.3f}')
print('\n[Training finished...]\n')

print(f'Prediction after training for x=5 : {model(X_test).item():.3f}')

# of samples = 4  # of features = 1 

Prediction before training for x=5 : 3.973

[Training started...]

epoch 2: w = 1.272, loss = 8.679
epoch 4: w = 1.518, loss = 4.183
epoch 6: w = 1.688, loss = 2.018
epoch 8: w = 1.806, loss = 0.976
epoch 10: w = 1.888, loss = 0.474
epoch 12: w = 1.944, loss = 0.232
epoch 14: w = 1.983, loss = 0.116
epoch 16: w = 2.010, loss = 0.060
epoch 18: w = 2.029, loss = 0.033
epoch 20: w = 2.042, loss = 0.020

[Training finished...]

Prediction after training for x=5 : 9.988
