In [None]:
import torch
import numpy as np


# Fully Connected Layers

In [None]:
linear = torch.nn.Linear(5, 10)  # input feature dim = 5, output feature dim = 10
input_t = torch.rand(1, 5)  # batch size = 2, feature dim = 5

output_t = linear(input_t)

optim = torch.optim.SGD(linear.parameters(), lr=1e-2)
# optim = torch.optim.SGD([linear.weight, linear.bias], lr=1e-2)
for n, val in linear.named_parameters():
    print(n, val)

s = torch.sum(output_t)
optim.zero_grad()
s.backward()

# print("\nOutput:")
# print(output_t.shape, "\n")
# print("Parameters:")
# print("Weight:", linear.weight.shape)
# print("Bias:", linear.bias.shape, "\n")

print("\nWeight Gradient:", linear.weight.grad)
print("Bias   Gradient:", linear.bias.grad)
optim.step()
print("\nAfter Gradient Update:")
print("Weigths: ", linear.weight)
print("Bias: ", linear.bias)


# Convolutional Layers

In [None]:
conv = torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=0)
# Input: Batch x channel x height x width
input_t = torch.rand(10, 3, 32, 32)
output_t = conv(input_t)

# ========================#
# Output Shape:          #
# (W - K + 2P)/S + 1     #
# (32 - 3 + 0)/1 + 1 = 30#
# ========================#

print("Output:")
print(output_t.shape)
print("Parameters:")
print("Weights:", conv.weight.shape)
print("Bias:", conv.bias.shape)


In [None]:
conv = torch.nn.Conv2d(3, 32, 3, stride=1, padding=1)
input_t = torch.rand(10, 3, 32, 32)
output_t = conv(input_t)

# ========================#
# Output Shape:          #
# (W - K + 2P)/S + 1     #
# (32 - 3 + 2)/1 + 1 = 32#
# ========================#

print("Output:")
print(output_t.shape)
print("Parameters:")
print("Weights:", conv.weight.shape)
print("Bias:", conv.bias.shape)


In [None]:
conv = torch.nn.Conv2d(3, 16, 3, stride=2)
input_t = torch.rand(10, 3, 32, 32)
output_t = conv(input_t)

# ===============================#
# Output Shape:                 #
# (W - K + 2P)/S + 1            #
# (32 - 3 + 0)/2 + 1 = 15.5 = 15#
# When the answer is not a whole#
# number we take the greatest   #
# integer of the output.        #
# ===============================#

print("Output:")
print(output_t.shape)
print("Parameters:")
print("Weights:", conv.weight.shape)
print("Bias:", conv.bias.shape)


In [None]:
# ================================#
# Transposed Convolutions,       #
# Deconvolutions,                #
# Upconvolutions                 #
# (W - 1) * S - 2P + (K-1) + 1   #
# (30-1) * 1 - 0 + (3-1) + 1 = 32#
# ================================#
tconv = torch.nn.ConvTranspose2d(in_channels=32, out_channels=3, kernel_size=3, stride=1, padding=0)
input_t = torch.rand(10, 32, 30, 30)
output_t = tconv(input_t)

print("Output:")
print(output_t.shape)
print("Parameters:")
print("Weights:", tconv.weight.shape)
print("Bias:", tconv.bias.shape)

conv = torch.nn.Conv2d(3, 32, 3, 1, 0)
input_again = conv(output_t)

print("\nInput Again:")
print(input_again.shape)
print("Parameters:")
print("Weights:", conv.weight.shape)
print("Bias:", conv.bias.shape)


In [None]:
conv = torch.nn.Conv2d(3, 32, 3, stride=1, padding=1)
input_t = torch.rand(
    10,
    3,
    32,
    32,
)
output_t = conv(input_t)

s = torch.norm(output_t, p=2)
s.backward()

print("Output:")
print(output_t.shape)
print("Parameters:")
print("Weights:", conv.weight.shape)
print("Bias:", conv.bias.shape)

print("Weight gradient:", conv.weight.grad.shape)
print("Bias gradient: ", conv.bias.grad.shape)


# Building Models

In [None]:
from torch import nn
import torch.nn.functional as F


model = nn.Sequential(nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU())
model


In [None]:
# Example of using Sequential with OrderedDict
from collections import OrderedDict

model = nn.Sequential(
    OrderedDict(
        [("conv1", nn.Conv2d(1, 20, 5)), ("relu1", nn.ReLU()), ("conv2", nn.Conv2d(20, 64, 5)), ("relu2", nn.ReLU())]
    )
)
model


In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # Define Convolutional Layers
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        # Dropout
        self.dropout1 = nn.Dropout(p=0.25)
        self.dropout2 = nn.Dropout(p=0.5)
        # Define fully connected layers
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)

        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = self.fc2(x)
        output = x

        return output


model_test = MyModel()
model_test


# Choosing Device

In [None]:
tensor = torch.rand(2, 4, 5)
print(tensor.device)
# We move our tensor to the GPU if available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = "cpu"

# Copy your tensors to the GPU
tensor = tensor.to(device)
print(tensor.device)

# Run operations on Multiple GPUs parallely
tensor = torch.nn.DataParallel(tensor)


# Create Dataloader

In [None]:
from torch.utils.data import Dataset, DataLoader


class RandomDataset(Dataset):
    def __init__(self, dim, length):
        self.len = length
        self.data = torch.randn(length, dim)

    def __getitem__(self, index):
        return torch.tensor(self.data[index])

    def __len__(self):
        return self.len


rand_loader = DataLoader(dataset=RandomDataset(5, 100), batch_size=30, shuffle=True, num_workers=1)


In [None]:
for data in rand_loader:
    x_batch = data
    print(data.shape)


# A Simple Training Workflow using Pytorch (MNIST Classification)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

import time
import matplotlib.pyplot as plt

# Specify the device to use.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using ", device)
# ===============================================#
# Get Data                                      #
# MNIST is available inside PyTorch and we      #
# do not have to write our own custom dataloader#
# ===============================================#

# IMPORTANT to transform image data to tensor!
# This step converts pixel values between 0-255
# to floats between 0-1.
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        #         transforms.Normalize((0.1307,), (0.3081,)),
    ]
)

train_dataset = datasets.MNIST("data", train=True, download=True, transform=transform)

# print(len(train_dataset))
# example_x, example_y = train_dataset[20]
# print(example_x.min(), example_x.max())

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

# Create Model and send to device
model = MyModel().to(device)
# Define your optimizer. Here we are using Adam with default parameters.
optimizer = optim.Adam(model.parameters(), lr=1e-3)
# print(optimizer.param_groups[0].keys())
# Set the model to train
model.eval()
# print(model.training)

epoch_loss = []
start_time = time.time()
print("\nTRAINING...")
for epoch in range(1, 11):
    print("\nEpoch: ", epoch)
    running_loss = 0
    e_time = time.time()
    # Iterate over the entire dataset
    for b_id, (x, y) in enumerate(train_loader, 1):
        # IMPORTANT to send data to device.
        x, y = x.to(device), y.to(device)
        # Remove old gradients
        optimizer.zero_grad()

        output = model(x)
        train_loss = F.cross_entropy(output, y)  # Using cross entropy loss for classification.

        # Backpropagation
        train_loss.backward()
        # Gradient Update
        optimizer.step()

        running_loss += (train_loss - running_loss) / b_id
    print(
        "Average Epoch Loss: {:.4f}".format(running_loss.item()),
        "Time Taken: {:.2f}s".format(time.time() - e_time, "\n"),
    )
    epoch_loss.append(running_loss.cpu().detach().numpy())

print("Training Time: {:.2f}s".format(time.time() - start_time))
plt.plot(range(0, 10), epoch_loss)
plt.xlabel("Epochs")
plt.ylabel("Training Loss")
plt.title("MNIST Classification")


In [None]:
import torchvision


def imshow(img):
    img = img * 255.0  # unnormalize
    npimg = img.numpy().astype(np.uint8)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


dataiter = iter(train_loader)
images, labels = dataiter.next()
imshow(torchvision.utils.make_grid(images[:6]))
