Let's learn Pytorch!

In [2]:
import torch

In [8]:
device="cuda" if torch.cuda.is_available() else "cpu"
ten = torch.tensor([[1, 2, 6], [3, 4, 8]], dtype=torch.float32, device=device, requires_grad=True)
print(ten)

tensor([[1., 2., 6.],
        [3., 4., 8.]], requires_grad=True)


In [9]:
ten.dtype

torch.float32

In [10]:
ten.device

device(type='cpu')

In [11]:
ten.shape

torch.Size([2, 3])

In [12]:
ten.requires_grad

True

In [15]:
x = torch.empty(size=(3, 3))
x

tensor([[-5.0739e+36,  4.5769e-41, -5.0739e+36],
        [ 4.5769e-41,  4.4842e-44,  0.0000e+00],
        [ 8.9683e-44,  0.0000e+00,  4.9079e-36]])

In [16]:
x = torch.zeros((3, 3))
x

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [17]:
x = torch.ones((4, 5))
x

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])

In [18]:
x = torch.rand((2, 3))
x

tensor([[0.0546, 0.4593, 0.4001],
        [0.9300, 0.4807, 0.7609]])

In [20]:
x = torch.eye(3, 3)
x

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

In [21]:
x = torch.arange(start=0, end=5, step=1)
x

tensor([0, 1, 2, 3, 4])

In [25]:
x = torch.linspace(start=0.1, end=1, steps=5)
x

tensor([0.1000, 0.3250, 0.5500, 0.7750, 1.0000])

In [28]:
x = torch.empty((1, 5)).normal_(mean=90, std=1)
x

tensor([[88.8459, 88.2524, 89.9002, 90.6685, 89.0325]])

In [33]:
x = torch.empty((1, 5)).uniform_(90, to=100)
x

tensor([[95.8705, 93.8702, 90.0342, 96.4781, 91.1489]])

In [39]:
x = torch.diag(torch.empty(5))
x

tensor([[ 4.9108e-36,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00, -5.0738e+36,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  4.5769e-41,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  8.9683e-44]])

In [41]:
# Initializing and converting tensors to other types (int, float, double)
tensor = torch.arange(4)
print(tensor.bool())
print(tensor.short())
print(tensor.long())
print(tensor.double())
print(tensor.half())

tensor([False,  True,  True,  True])
tensor([0, 1, 2, 3], dtype=torch.int16)
tensor([0, 1, 2, 3])
tensor([0., 1., 2., 3.], dtype=torch.float64)
tensor([0., 1., 2., 3.], dtype=torch.float16)


In [42]:
# Numpy array to tensor conversion and vice versa
import numpy as np

np_array = np.zeros((3, 3))
tensorrr = torch.from_numpy(np_array)
print(tensorrr)

np_array_back = tensorrr.numpy()
print(np_array_back)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float64)
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [43]:
# Tensor Math

x = torch.tensor([1, 2, 3])
y = torch.tensor([7, 6, 5])

z = x + y
z1 = torch.add(x, y)
z2 = torch.empty((3, 3))
torch.add(x, y, out=z2)

  torch.add(x, y, out=z2)


tensor([8., 8., 8.])

In [45]:
z = x - y
z

tensor([-6, -4, -2])

In [46]:
z = torch.true_divide(x, y)
z

tensor([0.1429, 0.3333, 0.6000])

In [49]:
t = torch.ones(3)
t.add_(x)  #inplace addtion
t

tensor([2., 3., 4.])

In [51]:
z = x.pow(2)
z = x ** 2
z


tensor([1, 4, 9])

In [52]:
print(x>0)

tensor([True, True, True])


In [62]:
# Matrix Multiplication

x1 = torch.tensor([[2, 3], [1, 4]])
x2 = torch.tensor([[6, 1], [7, 5]])
x1.mm(x2)
torch.mm(x1, x2)

tensor([[33, 17],
        [34, 21]])

In [59]:
#Matrix Exponentiation
x1.matrix_power(2)  # x1 * x1

tensor([[ 7, 18],
        [ 6, 19]])

In [63]:
# Element wise matrix multiplication
x1 * x2

tensor([[12,  3],
        [ 7, 20]])

In [70]:
# Broadcasting
 
x1 = torch.rand((2,2))
x2 = torch.rand((1, 2))

x1 - x2

tensor([[-0.7481, -0.3228],
        [ 0.1977, -0.1911]])

In [73]:
x1 ** x2

tensor([[0.0434, 0.5590],
        [0.9711, 0.6784]])

In [5]:
# Other Useful tensor operations

x = torch.tensor([1, 2, 3])
print(f"Sum = {x.sum(dim=0)}")
print(f"Max = {x.max(dim=0)}") # returns value and indice
print(f"Min = {x.min(dim=0)}") # returns value and indice
print(f"Argmax = {x.argmax(dim=0)}") # returns indices
print(f"Argmin = {x.argmin(dim=0)}") # returns indices
print(f"Abs = {x.abs()}")

Sum = 6
Max = torch.return_types.max(
values=tensor(3),
indices=tensor(2))
Min = torch.return_types.min(
values=tensor(1),
indices=tensor(0))
Argmax = 2
Argmin = 0
Abs = tensor([1, 2, 3])


In [9]:
x = torch.tensor([[1, 2, 3], [5, 3, 1]])
print(f"Sum_col = {x.sum(dim=0)}") # column wise
print(f"Sum_row = {x.sum(dim=1)}") # row wise

Sum_col = tensor([6, 5, 4])
Sum_row = tensor([6, 9])


In [12]:
torch.mean(x.float(), dim=1)

tensor([2., 3.])

In [14]:
y = torch.tensor([[3, 2, 1], [6, 4, 1]])
torch.eq(x, y)

tensor([[False,  True, False],
        [False, False,  True]])

In [16]:
torch.sort(y, dim=1, descending=False)

torch.return_types.sort(
values=tensor([[1, 2, 3],
        [1, 4, 6]]),
indices=tensor([[2, 1, 0],
        [2, 1, 0]]))

In [21]:
torch.clamp(y, min=0, max=3) # if ele>3, then set to 3, if ele<0 then set to 0 

tensor([[3, 2, 1],
        [3, 3, 1]])

In [23]:
x = torch.tensor([1, 0, 0, 1, 1], dtype = torch.bool)

In [24]:
x.any() # returns true if atleast value is true

tensor(True)

In [25]:
x.all() # returns true if all values are true

tensor(False)

Indexing

In [26]:
batch_size = 10
features = 25
x = torch.rand((batch_size, features))

In [28]:
x[0].shape

torch.Size([25])

In [29]:
x[0,:]

tensor([0.4990, 0.4619, 0.9113, 0.2286, 0.5949, 0.0044, 0.1815, 0.7251, 0.5421,
        0.5253, 0.3197, 0.3061, 0.4791, 0.3941, 0.3077, 0.6709, 0.9366, 0.2875,
        0.6122, 0.7426, 0.7373, 0.8424, 0.7188, 0.6311, 0.0714])

In [32]:
x[2,:10]

tensor([0.8565, 0.0779, 0.1349, 0.8346, 0.1091, 0.9881, 0.4135, 0.3835, 0.0516,
        0.7303])

In [38]:
x = torch.arange(10)
x[(x<2) | (x>7)]

tensor([0, 1, 8, 9])

In [37]:
x[x.remainder(2)==0]

tensor([0, 2, 4, 6, 8])

In [39]:
torch.where(x>5, x, x**2) #if x>5 then print x else print x**2

tensor([ 0,  1,  4,  9, 16, 25,  6,  7,  8,  9])

In [40]:
torch.tensor([1,2,2,4,4,1,2]).unique()

tensor([1, 2, 4])

In [41]:
x.ndimension()

1

In [44]:
x.numel() # returns number of elements in x

10

In [46]:
x = torch.rand((2, 5))
y = torch.rand((2, 5))

x.view((5, 2)) # works only whenn ternsor is in contiguos location
x.reshape((5, 2)) # works all time

tensor([[0.6510, 0.4341],
        [0.7108, 0.1772],
        [0.9614, 0.9328],
        [0.2372, 0.3970],
        [0.0923, 0.8968]])

In [50]:
torch.cat((x, y), dim=1).shape  # concatenate

torch.Size([2, 10])

In [52]:
batch = 64
x = torch.rand((batch, 2, 5))
x.view(batch, -1).shape

torch.Size([64, 10])

In [53]:
x.permute(1, 0, 2).shape  # mention indices to shuffle the shape

torch.Size([2, 64, 5])

In [63]:
x = torch.arange(10)
x.unsqueeze(0).shape

torch.Size([1, 10])

In [55]:
x.unsqueeze(1).shape

torch.Size([10, 1])

In [65]:
x.unsqueeze(0).unsqueeze(1).shape

torch.Size([1, 1, 10])

In [1]:
# Imports
from unicodedata import bidirectional
import torch
import torchvision  # torch package for vision related things
import torch.nn.functional as F  # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets  # Standard datasets
import torchvision.transforms as transforms  # Transformations we can perform on our dataset for augmentation
from torch import optim  # For optimizers like SGD, Adam, etc.
from torch import nn  # All neural network modules
from torch.utils.data import DataLoader  # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm  # For a nice progress bar!

In [2]:
# Hyperparameters
input_size = 28
hidden_size = 256
num_layers = 2
num_classes = 10
sequence_length = 28
learning_rate = 0.005
batch_size = 64
num_epochs = 3
load_model = True
checkpoint_name = "my_checkpoint.pth.tar"

In [17]:
# Save Checkpoint
def save_checkpoint(state, filename=checkpoint_name):
    print("=> Saving Checkpoint")
    torch.save(state, filename)

# Load Checkpoint
def load_checkpoint(checkpoint):
    print("=> Loading checkpoint")
    model.load_state_dict(checkpoint["state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer"])

In [4]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [5]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        # Forward propagate LSTM
        out, _ = self.rnn(x, h0)
        out = out.reshape(out.shape[0], -1)

        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out


# Recurrent neural network with GRU (many-to-one)
class RNN_GRU(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN_GRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        # Forward propagate LSTM
        out, _ = self.gru(x, h0)
        out = out.reshape(out.shape[0], -1)

        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out


# Recurrent neural network with LSTM (many-to-one)
class RNN_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN_LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) # Cell State

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        out = out.reshape(out.shape[0], -1)

        # Decode the hidden state of the last time step
        out = self.fc(out)
        return out

# Bidirectional Recurrent neural network 
class BRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(BRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional = True)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        # Set initial hidden and cell states
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(device) # Cell State

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out[:,-1,:])
        return out

In [6]:
# Load Data
train_dataset = datasets.MNIST(root="dataset/MNIST", train=True, transform=transforms.ToTensor(), download=False)
test_dataset = datasets.MNIST(root="dataset/MNIST", train=False, transform=transforms.ToTensor(), download=False)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)


In [21]:
# Initialize network (try out just using simple RNN, or GRU, and then compare with LSTM)
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [22]:
if load_model:
    load_checkpoint(torch.load(checkpoint_name))

=> Loading checkpoint


In [23]:
for epoch in range(num_epochs):
    losses = []
    checkpoint = {"state_dict" : model.state_dict(), "optimizer": optimizer.state_dict()}
    if epoch % 2 == 0:
        save_checkpoint(checkpoint)

    print(f"Training Epoch : {epoch}")
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
        # Get data to cuda if possible
        data = data.to(device=device).squeeze(1)
        targets = targets.to(device=device)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        
        losses.append(loss.item())

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent update step/adam step
        optimizer.step()
    print(f"Loss at epoch {epoch}: {loss}")

=> Saving Checkpoint
Training Epoch : 0


 25%|██▍       | 230/938 [00:10<00:31, 22.22it/s]


KeyboardInterrupt: 

In [16]:
model.load_state_dict()

AttributeError: '_IncompatibleKeys' object has no attribute 'load_state_dict'