### Import required libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import random
import matplotlib.pyplot as plt


In [2]:
print(torch.__version__)
print(torchvision.__version__)

2.5.1+cu121
0.20.1+cu121


### Setup device agnostic code

In [3]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

print(device)

cuda


In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [5]:
print(device)

cuda


In [6]:
# CUDA Diagnostics
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Device Count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("CUDA Device Name:", torch.cuda.get_device_name(0))
    print("CUDA Version:", torch.version.cuda)
print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)

CUDA Available: True
CUDA Device Count: 1
CUDA Device Name: NVIDIA GeForce RTX 4050 Laptop GPU
CUDA Version: 12.1
PyTorch Version: 2.5.1+cu121
Torchvision Version: 0.20.1+cu121


In [7]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
random.seed(42)

### Setting the hyperparameters

In [8]:
BATCH_SIZE = 128
EPOCHS = 10
LEARNING_RATE = 0.0003
PATCH_SIZE = 4
NUM_CLASSES = 10
IMAGE_SIZE = 32
EMBED_DIM = 256
DEPTH = 6
MLP_DIM = 512
DROP_RATE = 0.1

### Define image transformations

In [9]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5), (0.5)), #helps the model to converge faster, also helps to make the numerical computation stable
])

### Getting a dataset

In [10]:
train_dataset = datasets.CIFAR10(root='data',
                                 train=True,
                                 download=True,
                                 transform=transform)

Files already downloaded and verified


In [11]:
test_dataset = datasets.CIFAR10(root='data',
                                train=False,
                                download=True,
                                transform=transform)

Files already downloaded and verified


In [12]:
test_dataset

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=0.5, std=0.5)
           )

In [13]:
train_dataset

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=0.5, std=0.5)
           )

In [14]:
print(len(train_dataset))
print(len(test_dataset))

50000
10000


### Converting the datasets into dataloaders:
Our data is in the form of pytorch dataset, data,oasers turn the data into batches or mini-batches that makes more computationally efficient.

In [15]:
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False)

In [16]:
print(f"DataLoader: {train_loader, test_loader}")
print(f"Length of train_loader: {len(train_loader)}")
print(f"Length of test_loader: {len(test_loader)}")

DataLoader: (<torch.utils.data.dataloader.DataLoader object at 0x0000022249D6C110>, <torch.utils.data.dataloader.DataLoader object at 0x0000022270C883D0>)
Length of train_loader: 391
Length of test_loader: 79


### Building vision transformer model

SyntaxError: expected ':' (3884967341.py, line 3)