In [1]:
import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import CIFAR10
import torch.nn as nn
from torchvision.transforms import v2
import multiprocessing
import torch.quantization
import torch.optim as optim

In [2]:
# run variables

seed = 42

In [3]:
# device settings

num_workers = multiprocessing.cpu_count() // 2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

Using cuda device


In [4]:
# define datasets and loaders

transform = v2.Compose([
    v2.ToTensor(),  
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CIFAR10("./../data", train=True, transform=transform, download=True)
test_dataset = CIFAR10("./../data", train=False, transform=transform, download=True)

train_dataset, validation_dataset =  random_split(train_dataset, [0.8, 0.2])

print('train set size:', len(train_dataset))
print('validation set size:', len(validation_dataset))
print('test set size:', len(test_dataset))

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=num_workers)
validation_loader = DataLoader(validation_dataset, batch_size=128, shuffle=True, num_workers=num_workers)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=num_workers)

class_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]



Files already downloaded and verified
Files already downloaded and verified
train set size: 40000
validation set size: 10000
test set size: 10000


In [5]:
class BaseNN(nn.Module):
    def __init__(self, num_classes=10):
        super(BaseNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(512, num_classes)
        )

        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.features(x)
        x = x.reshape(x.size(0), -1)
        x = self.classifier(x)
        x = self.dequant(x)
        return x

In [6]:
torch.manual_seed(seed)
model = BaseNN(num_classes=10).to(device)

In [7]:
def train(model, epochs, learning_rate):
    trainingEpoch_loss = []
    validationEpoch_loss = []
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    qat_started = False 

    for epoch in range(epochs):
        model.train()

        if epoch == 2 and not qat_started:
            print("Rozpoczynanie Quantization-Aware Training (QAT)...")
            model.qconfig = torch.quantization.get_default_qat_qconfig("fbgemm")
            torch.quantization.prepare_qat(model, inplace=True)
            qat_started = True

        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            logits = outputs[0] if isinstance(outputs, tuple) else outputs
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_train_loss = running_loss / len(train_loader)
        trainingEpoch_loss.append(avg_train_loss)
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_train_loss:.4f}")

        # Ewaluacja
        model.eval()
        validation_loss = 0.0
        with torch.no_grad():
            for inputs, labels in validation_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                logits = outputs[0] if isinstance(outputs, tuple) else outputs
                loss = criterion(logits, labels)

                validation_loss += loss.item()

        avg_val_loss = validation_loss / len(validation_loader)
        validationEpoch_loss.append(avg_val_loss)
        print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {avg_val_loss:.4f}")

    return trainingEpoch_loss, validationEpoch_loss

In [14]:
def test(model):
    correct = 0
    total = 0

    model.eval()  
    
    with torch.no_grad():  
        for inputs, labels in test_loader:
            inputs, labels = inputs.to("cpu"), labels.to("cpu")  

            outputs = model(inputs)

            logits = outputs[0] if isinstance(outputs, tuple) else outputs

            _, predicted = torch.max(logits.data, 1)  

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")
    return accuracy

In [9]:
trainingEpoch_loss, validationEpoch_loss = train(model, epochs=7, learning_rate=0.0001)

Epoch 1/7, Training Loss: 1.3936
Epoch 1/7, Validation Loss: 1.1366
Epoch 2/7, Training Loss: 0.9849
Epoch 2/7, Validation Loss: 0.9388
Rozpoczynanie Quantization-Aware Training (QAT)...




Epoch 3/7, Training Loss: 0.8559
Epoch 3/7, Validation Loss: 0.8929
Epoch 4/7, Training Loss: 0.7643
Epoch 4/7, Validation Loss: 0.8042
Epoch 5/7, Training Loss: 0.6980
Epoch 5/7, Validation Loss: 0.7770
Epoch 6/7, Training Loss: 0.6425
Epoch 6/7, Validation Loss: 0.7858
Epoch 7/7, Training Loss: 0.5941
Epoch 7/7, Validation Loss: 0.7272


In [10]:
model.to("cpu")
model = torch.quantization.convert(model, inplace=True) 
torch.save(model.state_dict(), "../models/quantized_model_during_training.pt")

In [19]:
base_model = BaseNN(num_classes=10).to("cpu")
base_model.load_state_dict(torch.load("../models/quantized_base_model.pt"))

  base_model.load_state_dict(torch.load("../models/quantized_base_model.pt"))


<All keys matched successfully>

In [20]:
base_model.eval()

base_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')

torch.quantization.prepare(base_model, inplace=True)

with torch.no_grad():
    for i, (inputs, _) in enumerate(train_loader):
        if i >= 10: break 
        base_model(inputs)

after_model = torch.quantization.convert(base_model, inplace=False)



In [21]:
torch.save(after_model.state_dict(), "../models/quantized_model_after_training.pt")

In [22]:
print(f"Base model")
acc_base = test(base_model)

print(f"Quntized model during training")
acc_during = test(model)
print(f"Delta: {acc_during - acc_base}")

print(f"Quntized model after training")
acc_after = test(after_model)
print(f"Delta: {acc_after - acc_base}")

Base model
Test Accuracy: 73.11%
Quntized model during training
Test Accuracy: 74.37%
Delta: 1.2600000000000051
Quntized model after training
Test Accuracy: 72.68%
Delta: -0.4299999999999926


In [23]:
import os

size_MB_during = os.path.getsize("../models/quantized_model_during_training.pt") / (1024 * 1024)
size_MB_base = os.path.getsize("../models/quantized_base_model.pt") / (1024 * 1024)
size_MB_after = os.path.getsize("../models/quantized_model_after_training.pt") / (1024 * 1024)
print(f"Base model size: {size_MB_base:.2f} MB")
print(f"Quantized model during training size: {size_MB_during:.2f} MB")
print(f"Quantized model after training size: {size_MB_after:.2f} MB")

Base model size: 4.54 MB
Quantized model during training size: 1.17 MB
Quantized model after training size: 1.17 MB


In [24]:
import time

base_model.eval()
inputs, labels = next(iter(test_loader))
input = inputs.to("cpu")

start = time.time()
output = base_model(input)
end = time.time()

latency_ms = (end - start) * 1000

print(f"Base model: latency: {latency_ms:.2f} ms per inference")

model.eval()

start = time.time()
output = model(input)
end = time.time()

latency_ms = (end - start) * 1000

print(f"During training: latency: {latency_ms:.2f} ms per inference")

after_model.eval()

start = time.time()
output = after_model(input)
end = time.time()

latency_ms = (end - start) * 1000

print(f"After training: latency: {latency_ms:.2f} ms per inference")

Base model: latency: 344.24 ms per inference
During training: latency: 148.18 ms per inference
After training: latency: 142.37 ms per inference


In [25]:
from codecarbon import EmissionsTracker

tracker = EmissionsTracker()
tracker.start()

output = base_model(input)

emissions = tracker.stop()
print(f"Base model - Estimated CO2 emissions: {emissions} kg")

tracker = EmissionsTracker()
tracker.start()

output = model(input)

emissions = tracker.stop()
print(f"During model - Estimated CO2 emissions: {emissions} kg")

tracker = EmissionsTracker()
tracker.start()

output = after_model(input)

emissions = tracker.stop()
print(f"After model - Estimated CO2 emissions: {emissions} kg")

[codecarbon INFO @ 18:20:29] [setup] RAM Tracking...
[codecarbon INFO @ 18:20:29] [setup] CPU Tracking...
 Windows OS detected: Please install Intel Power Gadget to measure CPU

[codecarbon INFO @ 18:20:31] CPU Model on constant consumption mode: AMD Ryzen 5 2600 Six-Core Processor
[codecarbon INFO @ 18:20:31] [setup] GPU Tracking...
[codecarbon INFO @ 18:20:31] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 18:20:31] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 18:20:31] >>> Tracker's metadata:
[codecarbon INFO @ 18:20:31]   Platform system: Windows-11-10.0.26100-SP0
[codecarbon INFO @ 18:20:31]   Python version: 3.12.3
[codecarbon INFO @ 18:20:31]   CodeCarbon version: 3.0.2
[codecarbon INFO @ 18:20:31]   Available RAM : 15.924 GB
[codecarbon INFO @ 18:20:31]   CPU count: 12 thread(s) in 12 

Base model - Estimated CO2 emissions: 2.7130355684580645e-05 kg


 Windows OS detected: Please install Intel Power Gadget to measure CPU

[codecarbon INFO @ 18:20:36] CPU Model on constant consumption mode: AMD Ryzen 5 2600 Six-Core Processor
[codecarbon INFO @ 18:20:36] [setup] GPU Tracking...
[codecarbon INFO @ 18:20:36] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 18:20:36] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 18:20:36] >>> Tracker's metadata:
[codecarbon INFO @ 18:20:36]   Platform system: Windows-11-10.0.26100-SP0
[codecarbon INFO @ 18:20:36]   Python version: 3.12.3
[codecarbon INFO @ 18:20:36]   CodeCarbon version: 3.0.2
[codecarbon INFO @ 18:20:36]   Available RAM : 15.924 GB
[codecarbon INFO @ 18:20:36]   CPU count: 12 thread(s) in 12 physical CPU(s)
[codecarbon INFO @ 18:20:36]   CPU model: AMD Ryzen 5 2600 Six-Core Processor
[codecarbon 

During model - Estimated CO2 emissions: 1.1106384829562002e-05 kg


 Windows OS detected: Please install Intel Power Gadget to measure CPU

[codecarbon INFO @ 18:20:41] CPU Model on constant consumption mode: AMD Ryzen 5 2600 Six-Core Processor
[codecarbon INFO @ 18:20:41] [setup] GPU Tracking...
[codecarbon INFO @ 18:20:41] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 18:20:41] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 18:20:41] >>> Tracker's metadata:
[codecarbon INFO @ 18:20:41]   Platform system: Windows-11-10.0.26100-SP0
[codecarbon INFO @ 18:20:41]   Python version: 3.12.3
[codecarbon INFO @ 18:20:41]   CodeCarbon version: 3.0.2
[codecarbon INFO @ 18:20:41]   Available RAM : 15.924 GB
[codecarbon INFO @ 18:20:41]   CPU count: 12 thread(s) in 12 physical CPU(s)
[codecarbon INFO @ 18:20:41]   CPU model: AMD Ryzen 5 2600 Six-Core Processor
[codecarbon 

After model - Estimated CO2 emissions: 1.2811165995216807e-05 kg
