# PyTorch

In [77]:
import torch

- PyTorch is a library for processing tensors. A tensor is a number, vector, matrix or any n-dimensional array

In [78]:
# Number
t1 = torch.tensor(4.)
t1

tensor(4.)

In [79]:
t1.dtype

torch.float32

In [80]:
# Vector(Array)
t2=torch.tensor([1.,2.,3.,4.])
t2

tensor([1., 2., 3., 4.])

In [81]:
t2.dtype

torch.float32

In [82]:
# Matrix
t3=torch.tensor([[5.,6.],
                [7,8],
                [9,10]])

In [83]:
t3

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])

In [84]:
t3.dtype

torch.float32

In [85]:
# 3-Dimensional array
t4=torch.tensor([
    [[11,12,13],
    [13,14,15]],
    [[15,16,17],
    [17,18,19]]
])
t4

tensor([[[11, 12, 13],
         [13, 14, 15]],

        [[15, 16, 17],
         [17, 18, 19]]])

In [86]:
t4.shape

torch.Size([2, 2, 3])

In [87]:
t4.size()

torch.Size([2, 2, 3])

#### Tensor Operations and Gradients

In [88]:
# Create Tensors
x=torch.tensor(3.)
w=torch.tensor(4.,requires_grad=True) # We are alerting him that it can be used for differentiation
b=torch.tensor(5.,requires_grad=True) # We are alerting him that it can be used for differentiation

In [89]:
x,w,b

(tensor(3.), tensor(4., requires_grad=True), tensor(5., requires_grad=True))

In [90]:
# Arithmetic Operations
y=w*x+b

In [91]:
y

tensor(17., grad_fn=<AddBackward0>)

In [92]:
# Compute Derivatives
y.backward()  #Backward Propagation

In [93]:
# Display gradients
print("dy/dx: ",x.grad)
print("dy/dw: ",w.grad)
print("dy/db: ",b.grad)

dy/dx:  None
dy/dw:  tensor(3.)
dy/db:  tensor(1.)


- Tensor Functions

In [94]:
# Create a tensor with a fixed value for every element
t6=torch.full((3,2),42)
t6

tensor([[42, 42],
        [42, 42],
        [42, 42]])

In [95]:
t3

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.]])

In [96]:
t7=torch.concat((t3,t6))
t7

tensor([[ 5.,  6.],
        [ 7.,  8.],
        [ 9., 10.],
        [42., 42.],
        [42., 42.],
        [42., 42.]])

In [97]:
# compute the sin of each element
t8=torch.sin(t7)
t8

tensor([[-0.9589, -0.2794],
        [ 0.6570,  0.9894],
        [ 0.4121, -0.5440],
        [-0.9165, -0.9165],
        [-0.9165, -0.9165],
        [-0.9165, -0.9165]])

In [98]:
t8.shape

torch.Size([6, 2])

In [99]:
# Change the shape of a tensor
t9=t8.reshape(3,2,2)

In [100]:
t9

tensor([[[-0.9589, -0.2794],
         [ 0.6570,  0.9894]],

        [[ 0.4121, -0.5440],
         [-0.9165, -0.9165]],

        [[-0.9165, -0.9165],
         [-0.9165, -0.9165]]])

- Interoperability of PyTorch with Numpy

In [101]:
import numpy as np

In [102]:
x=np.array([[1,2],[3,4]])
x

array([[1, 2],
       [3, 4]])

In [103]:
# Convert Numpy array to a PyTorch tensor
y=torch.from_numpy(x)
y

tensor([[1, 2],
        [3, 4]])

In [104]:
# Convert torch tensor to Numpy array
z=y.numpy()
z

array([[1, 2],
       [3, 4]])

### Linear Regression from Scratch using PyTorch

In [105]:
# Making Training Data
# Input: (temperature,rainfall,humidity)
inputs=np.array([[73,67,43],
                [91,88,64],
                [87,134,58],
                [102,43,37],
                [69,96,70]],dtype='float32')

In [106]:
inputs

array([[ 73.,  67.,  43.],
       [ 91.,  88.,  64.],
       [ 87., 134.,  58.],
       [102.,  43.,  37.],
       [ 69.,  96.,  70.]], dtype=float32)

In [107]:
inputs.shape

(5, 3)

In [108]:
# Targets (apples,oranges)

target=np.array([[56,70],
                [81,101],
                [119,133],
                [22,37],
                [103,119]],dtype='float32')

In [109]:
target

array([[ 56.,  70.],
       [ 81., 101.],
       [119., 133.],
       [ 22.,  37.],
       [103., 119.]], dtype=float32)

In [110]:
target.shape

(5, 2)

In [111]:
# Convert Input and target to tensors
inputs=torch.from_numpy(inputs)
target=torch.from_numpy(target)

In [112]:
# weights and biases
w=torch.randn(2,3,requires_grad=True)
b=torch.randn(2,requires_grad=True)

In [113]:
w,b

(tensor([[-0.8495, -1.4858, -0.3431],
         [-1.2954,  1.9983, -0.4817]], requires_grad=True),
 tensor([-0.2907,  1.0617], requires_grad=True))

In [114]:
# Define the model

def model(x):
    return x @ w.t() + b

In [115]:
# Prediction
preds=model(inputs)
print(preds)

tensor([[-176.6033,   19.6693],
        [-230.3003,   28.1998],
        [-293.1910,  128.1932],
        [-163.5202,  -62.9652],
        [-225.5569,   69.7940]], grad_fn=<AddBackward0>)


In [116]:
# Actual
print(target)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [117]:
# Loss function MSE

def MSE(actual,pred):
    diff=actual-pred
    return torch.sum(diff*diff)/diff.numel()

In [118]:
loss=MSE(target,preds)
print(loss)

tensor(48355.1406, grad_fn=<DivBackward0>)


In [119]:
# Compute Gradients
loss.backward()

In [120]:
print(w)
print(w.grad)

tensor([[-0.8495, -1.4858, -0.3431],
        [-1.2954,  1.9983, -0.4817]], requires_grad=True)
tensor([[-24552.4961, -27546.2559, -16739.0957],
        [ -4861.7637,  -3888.9944,  -2849.0725]])


In [121]:
print(b)
print(b.grad)

tensor([-0.2907,  1.0617], requires_grad=True)
tensor([-294.0344,  -55.4218])


In [122]:
# adjust weight and reset grad
with torch.no_grad():
    w-=w.grad * 1e-5;
    b-=b.grad * 1e-5;
    w.grad.zero_()
    b.grad.zero_()

In [123]:
print(w)
print(b)

tensor([[-0.6039, -1.2103, -0.1757],
        [-1.2468,  2.0372, -0.4533]], requires_grad=True)
tensor([-0.2878,  1.0623], requires_grad=True)


In [124]:
# Calculate again
preds=model(inputs)
loss=MSE(target,preds)
print(loss)

tensor(32978.2695, grad_fn=<DivBackward0>)


In [125]:
# Training for multiple epochs
for i in range(400):
    preds=model(inputs)
    loss=MSE(target,preds)
    loss.backward()
    with torch.no_grad():
        w-=w.grad * 1e-5; # Learning rate
        b-=b.grad * 1e-5;
        w.grad.zero_()
        b.grad.zero_()
    print(f"Epochs({i}/{100}) & Loss {loss}")

Epochs(0/100) & Loss 32978.26953125
Epochs(1/100) & Loss 22611.419921875
Epochs(2/100) & Loss 15620.8076171875
Epochs(3/100) & Loss 10905.447265625
Epochs(4/100) & Loss 7723.4052734375
Epochs(5/100) & Loss 5574.6982421875
Epochs(6/100) & Loss 4122.39599609375
Epochs(7/100) & Loss 3139.4453125
Epochs(8/100) & Loss 2472.836669921875
Epochs(9/100) & Loss 2019.458740234375
Epochs(10/100) & Loss 1709.8267822265625
Epochs(11/100) & Loss 1497.113037109375
Epochs(12/100) & Loss 1349.7623291015625
Epochs(13/100) & Loss 1246.509033203125
Epochs(14/100) & Loss 1173.0211181640625
Epochs(15/100) & Loss 1119.640380859375
Epochs(16/100) & Loss 1079.8580322265625
Epochs(17/100) & Loss 1049.28662109375
Epochs(18/100) & Loss 1024.9697265625
Epochs(19/100) & Loss 1004.9130859375
Epochs(20/100) & Loss 987.7740478515625
Epochs(21/100) & Loss 972.6451416015625
Epochs(22/100) & Loss 958.916015625
Epochs(23/100) & Loss 946.17431640625
Epochs(24/100) & Loss 934.14111328125
Epochs(25/100) & Loss 922.62860107421

In [126]:
preds=model(inputs)
loss=MSE(target,preds)
print(loss)

tensor(55.5280, grad_fn=<DivBackward0>)


In [127]:
from math import sqrt
sqrt(loss)

7.4517117166007045

In [128]:
preds

tensor([[ 57.8998,  70.4368],
        [ 82.8202,  92.9607],
        [116.1162, 150.2901],
        [ 25.6364,  35.9291],
        [100.2740, 106.5714]], grad_fn=<AddBackward0>)

In [129]:
target

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

- Therfore we can see that they are almost close each other

### Neural Network using PyTorch

In [130]:
# To check GPU
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [131]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt

In [132]:
# Download Training data from open datasets
training_data=datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

In [133]:
test_data=datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [134]:
type(training_data)

In [135]:
batch_size=64

In [136]:
# Create Data Loaders
train_dataloader=DataLoader(training_data,batch_size=batch_size)
test_dataloader=DataLoader(test_data,batch_size=batch_size)

In [137]:
for X,y in test_dataloader:
    print("Shape of X [N,C,H,W]: ",X.shape)
    print("Shape of y: ",y.shape,y.dtype)
    break

Shape of X [N,C,H,W]:  torch.Size([64, 1, 28, 28])
Shape of y:  torch.Size([64]) torch.int64


In [138]:
print(y)

tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5])


In [139]:
# Get CPU or GPU device for Training
device="cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cpu device


In [140]:
# Define Model

class NeuralNetwork(nn.Module):
  def __init__(self):
    super(NeuralNetwork,self).__init__()
    self.flatten=nn.Flatten()
    self.linear_relu_stack=nn.Sequential(
        nn.Linear(28*28,512),
        nn.ReLU(),
        nn.Linear(512,512),
        nn.ReLU(),
        nn.Linear(512,10),
        nn.Softmax()
    )
  def forward(self,x):
    x=self.flatten(x)
    logits=self.linear_relu_stack(x)
    return logits

model=NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): Softmax(dim=None)
  )
)


In [141]:
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(model.parameters(),lr=1e-3)

In [142]:
def train(dataloader,model,loss_fn,optimizer):
  size=len(dataloader.dataset)
  for batch,(X,y) in enumerate(dataloader):
    X,y=X.to(device),y.to(device)
    # Compute prediction error
    pred=model(X)
    loss=loss_fn(pred,y)
    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if batch % 100 == 0:
      loss,current=loss.item(),batch*len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")


In [145]:
def test(dataloader,model,loss_fn):
  size=len(dataloader.dataset)
  num_batches=len(dataloader)
  model.eval()
  test_loss,correct=0,0
  with torch.no_grad():
    for X,y in dataloader:
      X,y=X.to(device),y.to(device)
      pred=model(X)
      test_loss+=loss_fn(pred,y).item()
      correct+=(pred.argmax(1)==y).type(torch.float).sum().item()
  test_loss/=num_batches
  print(f"Test Error: \n Accuracy: {(100*correct/size):>0.1f}%, Avg loss: {test_loss:>8f}")

In [146]:
epochs=5
for t in range(epochs):
  print(f"Epoch {t+1}\n-------------------------------")
  train(train_dataloader,model,loss_fn,optimizer)
  test(test_dataloader,model,loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.295351 [    0/60000]
loss: 2.295227 [ 6400/60000]
loss: 2.295760 [12800/60000]
loss: 2.297221 [19200/60000]
loss: 2.293325 [25600/60000]
loss: 2.294288 [32000/60000]
loss: 2.294695 [38400/60000]
loss: 2.294099 [44800/60000]
loss: 2.293982 [51200/60000]
loss: 2.293476 [57600/60000]
Test Error: 
 Accuracy: 29.0%, Avg loss: 2.293744
Epoch 2
-------------------------------
loss: 2.293662 [    0/60000]
loss: 2.293514 [ 6400/60000]
loss: 2.293855 [12800/60000]
loss: 2.295739 [19200/60000]
loss: 2.291290 [25600/60000]
loss: 2.292038 [32000/60000]
loss: 2.293017 [38400/60000]
loss: 2.291974 [44800/60000]
loss: 2.292151 [51200/60000]
loss: 2.291474 [57600/60000]
Test Error: 
 Accuracy: 31.6%, Avg loss: 2.291699
Epoch 3
-------------------------------
loss: 2.291777 [    0/60000]
loss: 2.291592 [ 6400/60000]
loss: 2.291725 [12800/60000]
loss: 2.294096 [19200/60000]
loss: 2.288957 [25600/60000]
loss: 2.289477 [32000/60000]
loss: 2.291099 [38400/6000

In [147]:
# save model
torch.save(model.state_dict(),"model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [148]:
# load model
model.load_state_dict(torch.load("model.pth"))

<All keys matched successfully>

In [149]:
## Prediction

classes=[
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot"
    ]

model.eval()
x,y=test_data[0][0],test_data[0][1]
with torch.no_grad():
  pred=model(x)
  predicted,actual=classes[pred[0].argmax(0)],classes[y]
  print(f'Predicted: "{predicted}", Actual: "{actual}"')



Predicted: "Ankle boot", Actual: "Ankle boot"


  return self._call_impl(*args, **kwargs)
