In [1]:
import torch
import torch.nn as nn
from torchvision import transforms
import torchvision
from torch.utils.data import DataLoader, random_split
import numpy as np
from tqdm import tqdm

In [2]:
# Device configuration GPU/CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
import numpy as np
print(np.__version__)


1.26.4


## VGG-16 Model


In [4]:
class VGG16(nn.Module):
    def __init__(self, num_classes=10):
        super(VGG16, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )



    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [5]:
# # Hyperparameters
# num_classes = 10
# num_epochs = 30
batch_size = 64
# learning_rate = 0.01

## Dataset

In [6]:
# Load CIFAR-100 dataset
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2761))
])

dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Split dataset into train and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
for inputs, labels in train_loader:
    print(f"Input shape: {inputs.shape}, Labels: {labels.shape}")
    break

Input shape: torch.Size([64, 3, 32, 32]), Labels: torch.Size([64])


In [None]:
# model = VGG16(num_classes=num_classes).to(device)
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)

In [19]:
# # Hyperparameters
num_classes = 10
# num_epochs = 30
# batch_size = 64
# learning_rate = 0.01

config_1 = {
    "num_epochs": 50,
    "batch_size": 128,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "scheduler": {
        "type": "StepLR",
        "step_size": 15,
        "gamma": 0.1
    }
}

config_2 = {
    "num_epochs": 40,
    "batch_size": 64,
    "learning_rate": 0.01,
    "optimizer": "SGD",
    "momentum": 0.9,
    "scheduler": {
        "type": "CosineAnnealingLR",
        "T_max": 40
    }
}

config_3 = {
    "num_epochs": 100,
    "batch_size": 32,
    "learning_rate": 0.0005,
    "optimizer": "Adam",
    "scheduler": {
        "type": "ReduceLROnPlateau",
        "mode": "min",
        "factor": 0.1,
        "patience": 10
    },
    "regularization": {
        "dropout_rate": 0.5
    }
}

def config_test(selected_config):
    print(selected_config["num_epochs"])
    # Atribuindo os hiperparâmetros escolhidos
    num_epochs = selected_config["num_epochs"]
    batch_size = selected_config["batch_size"]
    learning_rate = selected_config["learning_rate"]
    
    model = VGG16(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    
    if selected_config["optimizer"] == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    elif selected_config["optimizer"] == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=selected_config.get("momentum", 0))
    
    # Configurando o scheduler baseado na configuração
    if selected_config["scheduler"]["type"] == "StepLR":
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer,
            step_size=selected_config["scheduler"]["step_size"],
            gamma=selected_config["scheduler"]["gamma"]
        )
    elif selected_config["scheduler"]["type"] == "CosineAnnealingLR":
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=selected_config["scheduler"]["T_max"]
        )
    elif selected_config["scheduler"]["type"] == "ReduceLROnPlateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode=selected_config["scheduler"]["mode"],
            factor=selected_config["scheduler"]["factor"],
            patience=selected_config["scheduler"]["patience"]
        )


    return num_epochs, batch_size, learning_rate, model, criterion, optimizer, scheduler


# Training Original VGG16

### Test 1

In [21]:
num_epochs, batch_size, learning_rate, model, criterion, optimizer, scheduler = config_test(config_1)
total_step = len(train_loader)
model.train()
for epoch in range(num_epochs):

    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

    # Validate the model
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

    scheduler.step()

torch.save(model.state_dict(), "model_t1.h5")

50
Epoch [1/50], Step [100/625], Loss: 2.3864
Epoch [1/50], Step [200/625], Loss: 2.2682
Epoch [1/50], Step [300/625], Loss: 2.1015
Epoch [1/50], Step [400/625], Loss: 2.0011
Epoch [1/50], Step [500/625], Loss: 1.9381
Epoch [1/50], Step [600/625], Loss: 1.8948
Epoch [1/50], Validation Loss: 1.8884, Validation Accuracy: 26.03%
Epoch [2/50], Step [100/625], Loss: 1.9894
Epoch [2/50], Step [200/625], Loss: 1.8809
Epoch [2/50], Step [300/625], Loss: 1.8523
Epoch [2/50], Step [400/625], Loss: 1.8911
Epoch [2/50], Step [500/625], Loss: 1.8555
Epoch [2/50], Step [600/625], Loss: 1.8407
Epoch [2/50], Validation Loss: 1.8853, Validation Accuracy: 27.69%
Epoch [3/50], Step [100/625], Loss: 1.7752
Epoch [3/50], Step [200/625], Loss: 1.8089
Epoch [3/50], Step [300/625], Loss: 1.7971
Epoch [3/50], Step [400/625], Loss: 1.7322
Epoch [3/50], Step [500/625], Loss: 1.6909
Epoch [3/50], Step [600/625], Loss: 1.7109
Epoch [3/50], Validation Loss: 1.6736, Validation Accuracy: 32.48%
Epoch [4/50], Step [10

The code in the cell looks correct and should work without any errors, given that all the necessary variables and modules are already defined and imported in the previous cells. However, to ensure that the code runs smoothly, you should make sure that the `train_loader`, `val_loader`, `model`, `criterion`, `optimizer`, `device`, and `num_epochs` are properly defined and imported.



Made changes.

In [22]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

Test Accuracy: 85.49%


### Test 2

In [23]:
num_epochs, batch_size, learning_rate, model, criterion, optimizer, scheduler = config_test(config_2)

total_step = len(train_loader)
model.train()
for epoch in range(num_epochs):

    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

    # Validate the model
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

    scheduler.step()

torch.save(model.state_dict(), "model_t2.h5")

40
Epoch [1/40], Step [100/625], Loss: 2.1439
Epoch [1/40], Step [200/625], Loss: 1.8907
Epoch [1/40], Step [300/625], Loss: 1.7771
Epoch [1/40], Step [400/625], Loss: 1.6532
Epoch [1/40], Step [500/625], Loss: 1.5873
Epoch [1/40], Step [600/625], Loss: 1.4504
Epoch [1/40], Validation Loss: 1.6401, Validation Accuracy: 45.38%
Epoch [2/40], Step [100/625], Loss: 2.0406
Epoch [2/40], Step [200/625], Loss: 1.8989
Epoch [2/40], Step [300/625], Loss: 1.7858
Epoch [2/40], Step [400/625], Loss: 1.7573
Epoch [2/40], Step [500/625], Loss: 1.6150
Epoch [2/40], Step [600/625], Loss: 1.5562
Epoch [2/40], Validation Loss: 1.5370, Validation Accuracy: 40.25%
Epoch [3/40], Step [100/625], Loss: 1.4780
Epoch [3/40], Step [200/625], Loss: 1.4468
Epoch [3/40], Step [300/625], Loss: 1.4585
Epoch [3/40], Step [400/625], Loss: 1.3172
Epoch [3/40], Step [500/625], Loss: 1.3088
Epoch [3/40], Step [600/625], Loss: 1.2272
Epoch [3/40], Validation Loss: 1.1982, Validation Accuracy: 58.12%
Epoch [4/40], Step [10

In [24]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

Test Accuracy: 88.91%


### Test 3

In [26]:
num_epochs, batch_size, learning_rate, model, criterion, optimizer, scheduler = config_test(config_3)
total_step = len(train_loader)
model.train()
for epoch in range(num_epochs):

    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # Print every 100 mini-batches
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

    # Validate the model
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / total
    print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

    scheduler.step(val_loss)

torch.save(model.state_dict(), "model_t3.h5")

100
Epoch [1/100], Step [100/625], Loss: 2.1860
Epoch [1/100], Step [200/625], Loss: 2.0093
Epoch [1/100], Step [300/625], Loss: 1.9389
Epoch [1/100], Step [400/625], Loss: 1.9202
Epoch [1/100], Step [500/625], Loss: 1.8860
Epoch [1/100], Step [600/625], Loss: 1.8605
Epoch [1/100], Validation Loss: 1.9942, Validation Accuracy: 27.16%
Epoch [2/100], Step [100/625], Loss: 1.9567
Epoch [2/100], Step [200/625], Loss: 1.8770
Epoch [2/100], Step [300/625], Loss: 1.8985
Epoch [2/100], Step [400/625], Loss: 1.8642
Epoch [2/100], Step [500/625], Loss: 1.8313
Epoch [2/100], Step [600/625], Loss: 1.7995
Epoch [2/100], Validation Loss: 1.8582, Validation Accuracy: 22.86%
Epoch [3/100], Step [100/625], Loss: 1.8098
Epoch [3/100], Step [200/625], Loss: 1.7725
Epoch [3/100], Step [300/625], Loss: 1.7149
Epoch [3/100], Step [400/625], Loss: 1.6572
Epoch [3/100], Step [500/625], Loss: 1.6283
Epoch [3/100], Step [600/625], Loss: 1.5925
Epoch [3/100], Validation Loss: 1.5274, Validation Accuracy: 38.89%


In [27]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Accuracy: {100 * correct / total:.2f}%')

Test Accuracy: 88.87%


# Training Pruned VGG16 (TODO!)

# Training with Knowledge Distillation

In [None]:
def train_student(student_model, teacher_model, train_loader, val_loader, num_epochs, soft_target_loss_weight, ce_loss_weight, temperature):
    student_model.train()
    teacher_model.eval()

    optimizer = torch.optim.SGD(student_model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)

    ce_loss = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            # Forward pass through teacher model
            with torch.no_grad():
                teacher_outputs = teacher_model(inputs)
                teacher_probs = nn.functional.softmax(teacher_outputs / temperature, dim=1)

            # Forward pass through student model
            student_outputs = student_model(inputs)
            student_probs = nn.functional.log_softmax(student_outputs / temperature, dim=1)

            # Compute distillation loss
            soft_target_loss = torch.sum(teacher_probs * (teacher_probs.log() - student_probs))/ student_probs.size()[0] * (temperature**2)

            label_loss = ce_loss(student_outputs, labels)

            loss = soft_target_loss_weight * soft_target_loss + ce_loss_weight * label_loss

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:  # Print every 100 mini-batches
                print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {running_loss / 100:.4f}')
                running_loss = 0.0

        # Validate the student model
        student_model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = student_model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_accuracy = 100 * correct / total
        print(f'Epoch [{epoch + 1}/{num_epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        student_model.train()

    torch.save(student_model.state_dict(), "student_model.pth")

In [None]:
teacher = VGG16(num_classes)
teacher.load_state_dict(torch.load("teacher.pth", weights_only=True))
teacher.eval()

student = None

train_student(student, teacher, train_loader, val_loader, num_epochs,0.25, 0.75, 2 )