In [47]:
import torch
import torch.nn as nn
from torchvision import transforms
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader, random_split

from tqdm import tqdm

In [48]:
# Device configuration GPU/CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## VGG-19 Model

In [61]:
class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )



    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [62]:
num_classes = 10
num_epochs = 20
batch_size = 16
learning_rate = 0.005

## Dataset

In [63]:
# Load CIFAR-100 dataset
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2761))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2761))
])

dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Split dataset into train and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [65]:
model = VGG16(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9,weight_decay=0.005)

# Training Original VGG19

In [None]:
total_step = len(train_loader)
model.train()
for epoch in range(num_epochs):

    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f'Epoch [{epoch + 1}/30], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

    # Validate the model
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / total
    print(f'Epoch [{epoch + 1}/30], Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

torch.save(model.state_dict(), "model.h5")

Epoch [1/30], Step [100/2500], Loss: 2.3900
Epoch [1/30], Step [200/2500], Loss: 2.1761
Epoch [1/30], Step [300/2500], Loss: 2.1134
Epoch [1/30], Step [400/2500], Loss: 2.0635
Epoch [1/30], Step [500/2500], Loss: 2.0372
Epoch [1/30], Step [600/2500], Loss: 1.9912
Epoch [1/30], Step [700/2500], Loss: 1.9917
Epoch [1/30], Step [800/2500], Loss: 1.9270
Epoch [1/30], Step [900/2500], Loss: 1.9246
Epoch [1/30], Step [1000/2500], Loss: 1.9010
Epoch [1/30], Step [1100/2500], Loss: 1.8907
Epoch [1/30], Step [1200/2500], Loss: 1.8640
Epoch [1/30], Step [1300/2500], Loss: 1.8447
Epoch [1/30], Step [1400/2500], Loss: 1.9030
Epoch [1/30], Step [1500/2500], Loss: 1.8413
Epoch [1/30], Step [1600/2500], Loss: 1.8235
Epoch [1/30], Step [1700/2500], Loss: 1.8019
Epoch [1/30], Step [1800/2500], Loss: 1.7990
Epoch [1/30], Step [1900/2500], Loss: 1.7392
Epoch [1/30], Step [2000/2500], Loss: 1.7513
Epoch [1/30], Step [2100/2500], Loss: 1.7388
Epoch [1/30], Step [2200/2500], Loss: 1.7123
Epoch [1/30], Step 

In [8]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%') 

Accuracy of the network on the 10000 test images: 76.99 %


# Training Pruned VGG19 (TODO!)

# Training with Knowledge Distillation

In [None]:
def distillation_loss(student_probs, teacher_probs, labels, alpha):
    kl_div_loss = nn.functional.kl_div(student_probs, teacher_probs, reduction="batchmean")
    ce_loss = nn.functional.cross_entropy(student_probs, labels)
    return alpha * kl_div_loss + (1 - alpha) * ce_loss
 
