# Moduli Import

In [None]:
!pip install torch torchvision


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torch.quantization as quant
from torch.ao.quantization import get_default_qconfig
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
from torch.ao.quantization import QConfigMapping

In [None]:
# Code with dataset loader for VOC12 and Cityscapes (adapted from bodokaiser/piwise code)
# Sept 2017
# Eduardo Romera
#######################

import numpy as np
import os

from PIL import Image

from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, Resize, ToTensor


class Relabel:

    def __init__(self, olabel, nlabel):
        self.olabel = olabel
        self.nlabel = nlabel

    def __call__(self, tensor):
        assert isinstance(tensor, torch.LongTensor) or isinstance(tensor, torch.ByteTensor) , 'tensor needs to be LongTensor'
        tensor[tensor == self.olabel] = self.nlabel
        return tensor


class ToLabel:

    def __call__(self, image):
        return torch.from_numpy(np.array(image)).long()#.unsqueeze(0)


EXTENSIONS = ['.jpg', '.png']

def load_image(file):
    return Image.open(file)

def is_image(filename):
    return any(filename.endswith(ext) for ext in EXTENSIONS)

def is_label(filename):
    return filename.endswith("_labelTrainIds.png")

def image_path(root, basename, extension):
    return os.path.join(root, f'{basename}{extension}')

def image_path_city(root, name):
    return os.path.join(root, f'{name}')

def image_basename(filename):
    return os.path.basename(os.path.splitext(filename)[0])

def get_cityscapes_loader(datadir, batch_size, subset,num_workers=4,size = 512):

    # preprocessign of the input images
    input_transform_cityscapes = Compose([
        Resize(size, Image.BILINEAR),
        ToTensor(),
    ])
    target_transform_cityscapes = Compose([
        Resize(size, Image.NEAREST),
        ToLabel(),
        Relabel(255, 19),   #ignore label to 19
    ])

    return DataLoader(cityscapes(datadir, input_transform_cityscapes, target_transform_cityscapes, subset=subset), num_workers=num_workers, batch_size=batch_size, shuffle=False)



class VOC12(Dataset):

    def __init__(self, root, input_transform=None, target_transform=None):
        self.images_root = os.path.join(root, 'images')
        self.labels_root = os.path.join(root, 'labels')

        self.filenames = [image_basename(f)
            for f in os.listdir(self.labels_root) if is_image(f)]
        self.filenames.sort()

        self.input_transform = input_transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        filename = self.filenames[index]

        with open(image_path(self.images_root, filename, '.jpg'), 'rb') as f:
            image = load_image(f).convert('RGB')
        with open(image_path(self.labels_root, filename, '.png'), 'rb') as f:
            label = load_image(f).convert('P')

        if self.input_transform is not None:
            image = self.input_transform(image)
        if self.target_transform is not None:
            label = self.target_transform(label)

        return image, label

    def __len__(self):
        return len(self.filenames)


class cityscapes(Dataset):

    def __init__(self, root, input_transform=None, target_transform=None, subset='val'):

        #unisce la root di cityscapes con la cartella delle immagini e delle label e il corretto subset
        self.images_root = os.path.join(root, 'leftImg8bit/' + subset)
        self.labels_root = os.path.join(root, 'gtFine/' + subset)

        # crea una lista dei path di tutte le immagini (compresi di root)
        self.filenames = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.images_root)) for f in fn if is_image(f)]
        self.filenames.sort()

        # crea una lista dei path di tutte le label (compresi di root)
        self.filenamesGt = [os.path.join(dp, f) for dp, dn, fn in os.walk(os.path.expanduser(self.labels_root)) for f in fn if is_label(f)]
        self.filenamesGt.sort()

        self.input_transform = input_transform
        self.target_transform = target_transform

    def __getitem__(self, index):

        filename = self.filenames[index]
        filenameGt = self.filenamesGt[index]

        with open(filename, 'rb') as f:
            image = load_image(f).convert('RGB')
        with open(filenameGt, 'rb') as f:
            label = load_image(f).convert('P')

        if self.input_transform is not None:
            image = self.input_transform(image)
        if self.target_transform is not None:
            label = self.target_transform(label)

        return image, label#, filename, filenameGt

    def __len__(self):
        return len(self.filenames)

In [None]:
# ERFNET full network definition for Pytorch
# Sept 2017
# Eduardo Romera
#######################


import torch.nn.init as init
import torch.nn.functional as F


class DownsamplerBlock (nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()

        self.conv = nn.Conv2d(ninput, noutput-ninput, (3, 3), stride=2, padding=1, bias=True)
        self.pool = nn.MaxPool2d(2, stride=2)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)

    def forward(self, input):
        output = torch.cat([self.conv(input), self.pool(input)], 1)
        output = self.bn(output)
        return F.relu(output)


class non_bottleneck_1d (nn.Module):
    def __init__(self, chann, dropprob, dilated):
        super().__init__()

        self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1,0), bias=True)

        self.conv1x3_1 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1), bias=True)

        self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)

        self.conv3x1_2 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1*dilated,0), bias=True, dilation = (dilated,1))

        self.conv1x3_2 = nn.Conv2d(chann, chann, (1,3), stride=1, padding=(0,1*dilated), bias=True, dilation = (1, dilated))

        self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)

        self.dropout = nn.Dropout2d(dropprob)


    def forward(self, input):

        output = self.conv3x1_1(input)
        output = F.relu(output)
        output = self.conv1x3_1(output)
        output = self.bn1(output)
        output = F.relu(output)

        output = self.conv3x1_2(output)
        output = F.relu(output)
        output = self.conv1x3_2(output)
        output = self.bn2(output)

        if (self.dropout.p != 0):
            output = self.dropout(output)

        return F.relu(output+input)    #+input = identity (residual connection)


class Encoder(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.initial_block = DownsamplerBlock(3,16)

        self.layers = nn.ModuleList()

        self.layers.append(DownsamplerBlock(16,64))

        for x in range(0, 5):    #5 times
           self.layers.append(non_bottleneck_1d(64, 0.1, 1))

        self.layers.append(DownsamplerBlock(64,128))

        for x in range(0, 2):    #2 times
            self.layers.append(non_bottleneck_1d(128, 0.1, 2))
            self.layers.append(non_bottleneck_1d(128, 0.1, 4))
            self.layers.append(non_bottleneck_1d(128, 0.1, 8))
            self.layers.append(non_bottleneck_1d(128, 0.1, 16))

        #only for encoder mode:
        self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True)

    def forward(self, input, predict : bool =False):
        output = self.initial_block(input)

        for layer in self.layers:
            output = layer(output)

        #if predict:
            #output = self.output_conv(output)

        return output


class UpsamplerBlock (nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()
        self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)

    def forward(self, input):
        output = self.conv(input)
        output = self.bn(output)
        return F.relu(output)

class Decoder (nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.layers = nn.ModuleList()

        self.layers.append(UpsamplerBlock(128,64))
        self.layers.append(non_bottleneck_1d(64, 0, 1))
        self.layers.append(non_bottleneck_1d(64, 0, 1))

        self.layers.append(UpsamplerBlock(64,16))
        self.layers.append(non_bottleneck_1d(16, 0, 1))
        self.layers.append(non_bottleneck_1d(16, 0, 1))

        self.output_conv = nn.ConvTranspose2d( 16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True)

    def forward(self, input):
        output = input

        for layer in self.layers:
            output = layer(output)

        output = self.output_conv(output)

        return output


class ERFNet(nn.Module):
    def __init__(self, num_classes, encoder=None):  #use encoder to pass pretrained encoder
        super().__init__()

        if (encoder == None):
            self.encoder = Encoder(num_classes)
        else:
            self.encoder = encoder
        self.decoder = Decoder(num_classes)

    def forward(self, input, only_encode=False):
        #if only_encode:
            #return self.encoder.forward(input, predict=True)
        #else:
            output = self.encoder(input)    #predict=False by default
            return self.decoder.forward(output)

In [None]:
def load_my_state_dict(model, state_dict):  #custom function to load model when not all dict elements
        own_state = model.state_dict()
        for name, param in state_dict.items():
            if name not in own_state:
                if name.startswith("module."):
                    own_state[name.split("module.")[-1]].copy_(param)
                else:
                    print(name, " not loaded")
                    continue
            else:
                own_state[name].copy_(param)
        return model
device = "cuda"
datadir = '/content/drive/MyDrive/dataset/Cityscapes'


def load_my_quant_fx_state_dict(filepath,device='cpu',printing=False):
    model = ERFNet(num_classes=20)
    model.eval()
    qconfig_opt = get_default_qconfig("x86")

    qconfig_mapping = QConfigMapping().set_global(qconfig_opt).set_object_type(
                                      torch.nn.ConvTranspose2d, get_default_qconfig("qnnpack")
                                  )  # qconfig_opt is an optional qconfig, either a valid qconfig or None
    dataloader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)
    example_inputs = dataloader.dataset[0][0].unsqueeze(0)
    model = prepare_fx(model, qconfig_mapping, example_inputs)
    if printing:
        print('model.graph: ')
        print(model.graph)
    model = convert_fx(model)
    if printing:
        print('model: ')
        print(model)
    model.load_state_dict(torch.load(filepath))
    if printing:
      print("model loaded successfully")

    return model

# Pruning

In [None]:



model = ERFNet(num_classes=20)

weightspath = '/content/drive/MyDrive/trained_models/erfnet_finetuned.pth' # Usa map_location='cuda' per GPU
model = load_my_state_dict(model, torch.load(weightspath, map_location=lambda storage, loc: storage))


  model = load_my_state_dict(model, torch.load(weightspath, map_location=lambda storage, loc: storage))


In [None]:
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):  # Prune solo i layer Conv2d
        prune.ln_structured(module, name='weight', amount=0.3, n=2,dim=0)  # Rimuovi il 30% dei pesi meno significativi

In [None]:
for name, module in model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        prune.remove(module, 'weight')  # Rimuovi la maschera


In [None]:
import torch.optim as optim
from torch.cuda.amp import GradScaler, autocast

# Definizione della funzione di perdita e ottimizzatore
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
#optimizer = optim.Adam(model.parameters()[-1], lr=1e-4)




device = 'cuda'
datadir = '/content/drive/MyDrive/dataset/Cityscapes'
dataloader = get_cityscapes_loader(datadir, 1, 'train',num_workers=2,size = 256)
#scaler = GradScaler('cuda')

model.to(device)
model.train()
for epoch in range(10):
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)

        # Forward pass
        #with autocast():
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Backward pass
        """scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()"""
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")


Epoch 1, Loss: 0.6086463928222656
Epoch 2, Loss: 0.5213028192520142
Epoch 3, Loss: 0.5846103429794312
Epoch 4, Loss: 0.48133596777915955
Epoch 5, Loss: 0.4966106116771698
Epoch 6, Loss: 0.3948286771774292
Epoch 7, Loss: 0.30974632501602173
Epoch 8, Loss: 0.278922438621521
Epoch 9, Loss: 0.25721538066864014
Epoch 10, Loss: 0.2467101812362671


In [None]:
torch.save(model.state_dict(), '/content/drive/MyDrive/trained_models/erfnet_finetuned_pruned_30%.pth')


## Some early evaluation

In [None]:
#iou = eval_iou(model, datadir, cpu=False, num_classes=20, ignoreIndex=19)



---------------------------------------
Took  82.98403930664062 seconds
Per-Class IoU:
[0m94.27[0m Road
[0m76.53[0m sidewalk
[0m87.43[0m building
[0m32.45[0m wall
[0m48.08[0m fence
[0m48.87[0m pole
[0m49.61[0m traffic light
[0m58.99[0m traffic sign
[0m89.24[0m vegetation
[0m46.87[0m terrain
[0m89.74[0m sky
[0m67.76[0m person
[0m42.09[0m rider
[0m89.28[0m car
[0m49.48[0m truck
[0m65.45[0m bus
[0m48.42[0m train
[0m21.83[0m motorcycle
[0m65.28[0m bicycle
MEAN IoU:  [0m61.67[0m %


In [None]:
from torchsummary import summary

Original_model = ERFNet(num_classes=20)
weightspath = '/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'
Original_model = load_my_state_dict(Original_model, torch.load(weightspath, map_location=lambda storage, loc: storage))


Original_model.to(device)
Original_model.eval()
model.to(device)
model.eval()
summary(Original_model, (3,256, 512))  # Input RGB (3 canali, 512x1024)
summary(model, (3,256, 512))


  Original_model = load_my_state_dict(Original_model, torch.load(weightspath, map_location=lambda storage, loc: storage))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 13, 128, 256]             364
         MaxPool2d-2          [-1, 3, 128, 256]               0
       BatchNorm2d-3         [-1, 16, 128, 256]              32
  DownsamplerBlock-4         [-1, 16, 128, 256]               0
            Conv2d-5          [-1, 48, 64, 128]           6,960
         MaxPool2d-6          [-1, 16, 64, 128]               0
       BatchNorm2d-7          [-1, 64, 64, 128]             128
  DownsamplerBlock-8          [-1, 64, 64, 128]               0
            Conv2d-9          [-1, 64, 64, 128]          12,352
           Conv2d-10          [-1, 64, 64, 128]          12,352
      BatchNorm2d-11          [-1, 64, 64, 128]             128
           Conv2d-12          [-1, 64, 64, 128]          12,352
           Conv2d-13          [-1, 64, 64, 128]          12,352
      BatchNorm2d-14          [-1, 64, 

In [None]:
def count_active_parameters(model):
    total_params = 0
    active_params = 0
    for name, param in model.named_parameters():
        if 'weight' in name:
            total_params += param.numel()  # Numero totale di parametri
            active_params += (param != 0).sum().item()  # Conta i pesi non azzerati
    return total_params, active_params


total, active = count_active_parameters(model)
print(f"Total parameters: {total}")
print(f"Active Parameters: {active}")


Totale parametri: 2057279
Parametri attivi dopo il pruning: 1606007


In [None]:
import time

def measure_inference_time(model, inputs):
    model.eval()
    start_time = time.time()
    with torch.no_grad():
        outputs = model(inputs)
    elapsed_time = time.time() - start_time
    return elapsed_time

inputs = torch.randn(1, 3, 512, 1024).to(device)  # Esempio di input
time_erfnet = measure_inference_time(Original_model, inputs)
time_pruned = measure_inference_time(model, inputs)
print(f"ErfNet Inference Time: {time_erfnet:.4f}s")
print(f"Pruned ErfNet Inference Time: {time_pruned:.4f}s")


ErfNet Inference Time: 0.0284s
Pruned ErfNet Inference Time: 0.0079s


In [None]:
!pip install fvcore

Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.7->fvcore)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Downloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Building wheels for collected packages: fvcore, iopath
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Created wheel for fvcore: filename=fvcore-0.1.5.

In [None]:
!pip install fvcore
from fvcore.nn import FlopCountAnalysis

flops = FlopCountAnalysis(Original_model, inputs)
print(f"ErfNet FLOPs: {flops.total()}")
flops_pruned = FlopCountAnalysis(model, inputs)
print(f"Pruned ErfNet FLOPs: {flops_pruned.total()}")


decoder.layers.1.dropout, decoder.layers.2.dropout, decoder.layers.4.dropout, decoder.layers.5.dropout, encoder.output_conv


ErfNet FLOPs: 26740654080


decoder.layers.1.dropout, decoder.layers.2.dropout, decoder.layers.4.dropout, decoder.layers.5.dropout, encoder.output_conv


Pruned ErfNet FLOPs: 26740654080


In [None]:
# Conta i pesi azzerati in ogni layer
def count_zero_weights(model):
    total_params = 0
    zero_params = 0
    for name, param in model.named_parameters():
        if param.requires_grad:
            total_params += param.numel()
            zero_params += (param == 0).sum().item()
    return total_params, zero_params

# Confronta tra i due modelli
for model_name, model in [("ErfNet", Original_model), ("ErfNet Pruned", model)]:
    total, zero = count_zero_weights(model)
    print(f"{model_name}: {zero}/{total} ({100 * zero / total:.2f}% dei pesi azzerati)")


ErfNet: 1/2066836 (0.00% dei pesi azzerati)
ErfNet Pruned: 451272/2066836 (21.83% dei pesi azzerati)


In [None]:
import os

# Salva i modelli
torch.save(Original_model.state_dict(), 'erfnetUnPruned.pth')
torch.save(model.state_dict(), 'erfnetPruned.pth')

# Misura le dimensioni
size_erfnet = os.path.getsize('erfnetUnPruned.pth') / (1024 ** 2)  # Dimensioni in MB
size_pruned = os.path.getsize('erfnetPruned.pth') / (1024 ** 2)
print(f"ErfNet Size: {size_erfnet:.2f} MB")
print(f"Pruned ErfNet Size: {size_pruned:.2f} MB")


ErfNet Size: 8.02 MB
Pruned ErfNet Size: 8.02 MB


In [None]:
model = ERFNet(num_classes=20)
model.load_state_dict(torch.load('erfnetPruned.pth'))
model = model.to(device)

  model.load_state_dict(torch.load('erfnetPruned.pth'))


# FXGRAPH Quantization

In [None]:
Original_model_path = '/content/drive/MyDrive/trained_models/erfnet_finetuned.pth'


Original_model = ERFNet(num_classes=20)
load_my_state_dict(Original_model,torch.load(Original_model_path))
Original_model.to('cpu')

model_to_quantize = ERFNet(num_classes=20)
load_my_state_dict(model_to_quantize,torch.load(Original_model_path))
model_to_quantize.to('cpu')

dataloader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)


  load_my_state_dict(Original_model,torch.load(Original_model_path))
  load_my_state_dict(model_to_quantize,torch.load(Original_model_path))


In [None]:
model_to_quantize.eval()


ERFNet(
  (encoder): Encoder(
    (initial_block): DownsamplerBlock(
      (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (layers): ModuleList(
      (0): DownsamplerBlock(
        (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1-5): 5 x non_bottleneck_1d(
        (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0))
        (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
        (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (conv3x1_2): Conv2d(64

In [None]:


qconfig_opt = get_default_qconfig("x86")

qconfig_mapping = QConfigMapping().set_global(qconfig_opt).set_object_type(
                                      torch.nn.ConvTranspose2d, get_default_qconfig("qnnpack")
                                  )  # qconfig_opt is an optional qconfig, either a valid qconfig or None


In [None]:
example_inputs = dataloader.dataset[0][0].unsqueeze(0)
prepared_model = prepare_fx(model_to_quantize, qconfig_mapping, example_inputs)
print(prepared_model.graph)



graph():
    %input_1 : [num_users=1] = placeholder[target=input]
    %activation_post_process_0 : [num_users=2] = call_module[target=activation_post_process_0](args = (%input_1,), kwargs = {})
    %only_encode : [num_users=0] = placeholder[target=only_encode](default=False)
    %encoder_initial_block_conv : [num_users=1] = call_module[target=encoder.initial_block.conv](args = (%activation_post_process_0,), kwargs = {})
    %activation_post_process_1 : [num_users=1] = call_module[target=activation_post_process_1](args = (%encoder_initial_block_conv,), kwargs = {})
    %encoder_initial_block_pool : [num_users=1] = call_module[target=encoder.initial_block.pool](args = (%activation_post_process_0,), kwargs = {})
    %activation_post_process_2 : [num_users=1] = call_module[target=activation_post_process_2](args = (%encoder_initial_block_pool,), kwargs = {})
    %cat : [num_users=1] = call_function[target=torch.cat](args = ([%activation_post_process_1, %activation_post_process_2], 1), kwarg

In [None]:

def calibrate(model, data_loader):
    step = 0
    model.eval()
    with torch.no_grad():
        for image, target in data_loader:
            model(image)

            step += 1
            print(f"Step {step}/{len(data_loader)}")


calibrate(prepared_model, dataloader)

Step 1/500
Step 2/500
Step 3/500
Step 4/500
Step 5/500
Step 6/500
Step 7/500
Step 8/500
Step 9/500
Step 10/500
Step 11/500
Step 12/500
Step 13/500
Step 14/500
Step 15/500
Step 16/500
Step 17/500
Step 18/500
Step 19/500
Step 20/500
Step 21/500
Step 22/500
Step 23/500
Step 24/500
Step 25/500
Step 26/500
Step 27/500
Step 28/500
Step 29/500
Step 30/500
Step 31/500
Step 32/500
Step 33/500
Step 34/500
Step 35/500
Step 36/500
Step 37/500
Step 38/500
Step 39/500
Step 40/500
Step 41/500
Step 42/500
Step 43/500
Step 44/500
Step 45/500
Step 46/500
Step 47/500
Step 48/500
Step 49/500
Step 50/500
Step 51/500
Step 52/500
Step 53/500
Step 54/500
Step 55/500
Step 56/500
Step 57/500
Step 58/500
Step 59/500
Step 60/500
Step 61/500
Step 62/500
Step 63/500
Step 64/500
Step 65/500
Step 66/500
Step 67/500
Step 68/500
Step 69/500
Step 70/500
Step 71/500
Step 72/500
Step 73/500
Step 74/500
Step 75/500
Step 76/500
Step 77/500
Step 78/500
Step 79/500
Step 80/500
Step 81/500
Step 82/500
Step 83/500
Step 84/500
S

In [None]:
quantized_model = convert_fx(prepared_model)
print(quantized_model)

GraphModule(
  (encoder): Module(
    (initial_block): Module(
      (conv): QuantizedConv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), scale=0.2781981825828552, zero_point=67, padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (bn): QuantizedBNReLU2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (layers): Module(
      (0): Module(
        (conv): QuantizedConv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), scale=0.2597421109676361, zero_point=61, padding=(1, 1))
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (bn): QuantizedBNReLU2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): Module(
        (conv3x1_1): QuantizedConvReLU2d(64, 64, kernel_size=(3, 1), stride=(1, 1), scale=0.03849927335977554, zero_point=0, padding=(1, 0))
        (conv1x3_1): QuantizedConvReLU2d(64, 64, kernel_size=(1, 3), stride=(1, 1),

In [None]:
def load_my_quant_fx_state_dict(filepath,device='cpu',printing=False):
    model = ERFNet(num_classes=20)
    model.eval()
    qconfig_opt = get_default_qconfig("x86")

    qconfig_mapping = QConfigMapping().set_global(qconfig_opt).set_object_type(
                                      torch.nn.ConvTranspose2d, get_default_qconfig("qnnpack")
                                  )  # qconfig_opt is an optional qconfig, either a valid qconfig or None
    dataloader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)
    example_inputs = dataloader.dataset[0][0].unsqueeze(0)
    model = prepare_fx(model, qconfig_mapping, example_inputs)
    if printing:
        print('model.graph: ')
        print(model.graph)
    model = convert_fx(model)
    if printing:
        print('model: ')
        print(model)
    model.load_state_dict(torch.load(filepath))
    if printing:
      print("model loaded successfully")

    return model


In [None]:

fx_graph_mode_model_file_path = 'erfnet_finetuned_quantized_fx_2.pth'

torch.save(quantized_model,'model_'+ fx_graph_mode_model_file_path)
torch.save(quantized_model.state_dict(),  fx_graph_mode_model_file_path)


In [None]:
loaded_quantized_model = load_my_quant_fx_state_dict(fx_graph_mode_model_file_path,printing=False)
#criterion = nn.CrossEntropyLoss()

  model.load_state_dict(torch.load(filepath))


# Evaluation

In [None]:
# Code for evaluating IoU
# Nov 2017
# Eduardo Romera
#######################


class iouEval:

    # the ignoreIndex is the class that will be ignored for the evaluation (20th class in the case of Cityscapes)
    def __init__(self, nClasses, ignoreIndex=19):
        self.nClasses = nClasses
        self.ignoreIndex = ignoreIndex if nClasses>ignoreIndex else -1 #if ignoreIndex is larger than nClasses, consider no ignoreIndex
        self.reset()

    def reset (self):
        classes = self.nClasses if self.ignoreIndex==-1 else self.nClasses-1
        self.tp = torch.zeros(classes).double()
        self.fp = torch.zeros(classes).double()
        self.fn = torch.zeros(classes).double()

    def addBatch(self, x, y):   #x=preds, y=targets
        #sizes should be "batch_size x nClasses x H x W"

        #print ("X size: ", x.size())
        #print ("Y size: ", y.size())

        #print ("X is cuda: ", x.is_cuda)
        #print ("Y is cuda: ", y.is_cuda)

        #print("Nclasses: ", self.nClasses)

        if (x.is_cuda or y.is_cuda):
            x = x.cuda()
            y = y.cuda()

        #if size is "batch_size x 1 x H x W" scatter to onehot
        if (x.size(1) == 1):
            x_onehot = torch.zeros(x.size(0), self.nClasses, x.size(2), x.size(3))
            if x.is_cuda:
                x_onehot = x_onehot.cuda()
            x_onehot.scatter_(1, x, 1).float()
        else:
            x_onehot = x.float()

        if (y.size(1) == 1):
            y_onehot = torch.zeros(y.size(0), self.nClasses, y.size(2), y.size(3))
            if y.is_cuda:
                y_onehot = y_onehot.cuda()
            y_onehot.scatter_(1, y, 1).float()
        else:
            y_onehot = y.float()

        if (self.ignoreIndex != -1):
            ignores = y_onehot[:,self.ignoreIndex].unsqueeze(1)
            x_onehot = x_onehot[:, :self.ignoreIndex]
            y_onehot = y_onehot[:, :self.ignoreIndex]
        else:
            ignores=0

        tpmult = x_onehot * y_onehot    #times prediction and gt coincide is 1
        tp = torch.sum(torch.sum(torch.sum(tpmult, dim=0, keepdim=True), dim=2, keepdim=True), dim=3, keepdim=True).squeeze()
        fpmult = x_onehot * (1-y_onehot-ignores) #times prediction says its that class and gt says its not (subtracting cases when its ignore label!)
        fp = torch.sum(torch.sum(torch.sum(fpmult, dim=0, keepdim=True), dim=2, keepdim=True), dim=3, keepdim=True).squeeze()
        fnmult = (1-x_onehot) * (y_onehot) #times prediction says its not that class and gt says it is
        fn = torch.sum(torch.sum(torch.sum(fnmult, dim=0, keepdim=True), dim=2, keepdim=True), dim=3, keepdim=True).squeeze()

        self.tp += tp.double().cpu()
        self.fp += fp.double().cpu()
        self.fn += fn.double().cpu()

    def getIoU(self):
        num = self.tp
        den = self.tp + self.fp + self.fn + 1e-15
        iou = num / den
        return torch.mean(iou), iou     #returns "iou mean", "iou per class"

# Class for colors
class colors:
    RED       = '\033[31;1m'
    GREEN     = '\033[32;1m'
    YELLOW    = '\033[33;1m'
    BLUE      = '\033[34;1m'
    MAGENTA   = '\033[35;1m'
    CYAN      = '\033[36;1m'
    BOLD      = '\033[1m'
    UNDERLINE = '\033[4m'
    ENDC      = '\033[0m'

# Colored value output if colorized flag is activated.
def getColorEntry(val):
    if not isinstance(val, float):
        return colors.ENDC
    if (val < .20):
        return colors.RED
    elif (val < .40):
        return colors.YELLOW
    elif (val < .60):
        return colors.BLUE
    elif (val < .80):
        return colors.CYAN
    else:
        return colors.GREEN

In [None]:
# Code to calculate IoU (mean and per-class) in a dataset
# Nov 2017
# Eduardo Romera
#######################

import torch
import time
from PIL import Image

from torch.autograd import Variable


# verificare come utilizzare il parametro method

def eval_iou(model, datadir, cpu, num_classes, ignoreIndex=19):

    # load the dataset
    loader = get_cityscapes_loader(datadir, 10, 'val')

    # create the IoU evaluator
    iouEvalVal = iouEval(num_classes, ignoreIndex=ignoreIndex)

    # start the timer used for the prints
    start = time.time()

    if cpu:
        model.to('cpu') # Move model to CPU
        model = model.to(torch.float32) # Move to float32 for CPU
    else:
        model.to('cuda')

    # start the timer used for the prints
    start = time.time()



    for step, (images, labels) in enumerate(loader):

        # if the cpu flag is not set, move the data to the gpu
        if cpu:
            images = images.cpu()
            labels = labels.cpu()
        else:
            images = images.cuda()
            labels = labels.cuda()

        # launch the model with the images as input while disabling gradient computation
        inputs = Variable(images)
        if cpu:
             inputs = inputs[:, :3, :, :]  # Seleziona solo i primi 3 canali
        dev = next(model.parameters()).device
        if dev == 'cpu':
          print(f"Il modello si trova su: {dev}")
          print(f"Il input si trova su: { inputs.device}")
          print(f"Il label si trova su: {labels.device}")


        #model.eval()
        with torch.no_grad():
          model.eval()
          out = model(inputs)



        # get the max logit value for each pixel
        outputs = out.max(1)[1].unsqueeze(1).data
        labels = labels.unsqueeze(1).data

        # add the batch to the IoU evaluator
        iouEvalVal.addBatch(outputs, labels)

        # print the filename of the image
        #filenameSave = filename[0].split("leftImg8bit/")[1]
        #print (step, filenameSave)

        if step in [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]:
           #print_output(out[0, :, :, :], filename[0].split("leftImg8bit/")[1])
           pass

    # get the IoU results
    iouVal, iou_classes = iouEvalVal.getIoU()

    iou_classes_str = []

    for i in range(iou_classes.size(0)):
        iouStr = getColorEntry(iou_classes[i])+'{:0.2f}'.format(iou_classes[i]*100) + '\033[0m'
        iou_classes_str.append(iouStr)

    print("---------------------------------------")
    print("Took ", time.time()-start, "seconds")
    print("=======================================")
    #print("TOTAL IOU: ", iou * 100, "%")
    print("Per-Class IoU:")
    print(iou_classes_str[0], "Road")
    print(iou_classes_str[1], "sidewalk")
    print(iou_classes_str[2], "building")
    print(iou_classes_str[3], "wall")
    print(iou_classes_str[4], "fence")
    print(iou_classes_str[5], "pole")
    print(iou_classes_str[6], "traffic light")
    print(iou_classes_str[7], "traffic sign")
    print(iou_classes_str[8], "vegetation")
    print(iou_classes_str[9], "terrain")
    print(iou_classes_str[10], "sky")
    print(iou_classes_str[11], "person")
    print(iou_classes_str[12], "rider")
    print(iou_classes_str[13], "car")
    print(iou_classes_str[14], "truck")
    print(iou_classes_str[15], "bus")
    print(iou_classes_str[16], "train")
    print(iou_classes_str[17], "motorcycle")
    print(iou_classes_str[18], "bicycle")
    if ignoreIndex == -1:
        print(iou_classes_str[19], "void")
    print("=======================================")
    iouStr = getColorEntry(iouVal)+'{:0.2f}'.format(iouVal*100) + '\033[0m'
    print ("MEAN IoU: ", iouStr, "%")

    return iouVal

In [None]:
#import torch.quantization as quant
#import torch.nn as nn
#import torch.nn.utils.prune as prune
device = 'cpu'

pruned_model = ERFNet(num_classes=20)
pruned_model.load_state_dict(torch.load('/content/drive/MyDrive/trained_models/erfnet_finetuned_pruned_30%.pth',map_location=torch.device('cpu')))
pruned_model = pruned_model.to(device)  # Assign the pruned model to the correct variable

Original_model = ERFNet(num_classes=20)
Original_model= load_my_state_dict(Original_model,torch.load('/content/drive/MyDrive/trained_models/erfnet_finetuned.pth',map_location=torch.device('cpu')))
Original_model = Original_model.to(device) # Assign the original model to the correct variable


# Load the quantized model using the quantized model loading function
#quantized_model = QuantizableERFNet(num_classes=20)
#quantized_model = load_my_quant_state_dict(quantized_model,'quantized_erfnet.pth',d = 'cpu')
#quantized_model = quantized_model.to(device) # Move to CUDA after correct initialization
#loaded_model.to(device)
loaded_quantized_model = load_my_quant_fx_state_dict('/content/drive/MyDrive/trained_models/erfnet_finetuned_pruned_30%_quantized_fx.pth')
loaded_quantized_model.to('cpu')

"""# Force all parameters and buffers to be on the CUDA device
for name, param in quantized_model.named_parameters():
    param.data = param.data.to(device) # Ensure all parameters are on CUDA
for name, buffer in quantized_model.named_buffers():
    buffer.data = buffer.data.to(device) # Ensure all buffers are on CUDA"""

loaded_quantized_nopruned_model = load_my_quant_fx_state_dict('/content/drive/MyDrive/trained_models/Pruning_Quantization/sd_quantized_erfnet_fx.pth')
loaded_quantized_nopruned_model.to('cpu')


inputs = (torch.randn(1, 3, 512, 1024) + torch.ones(1,3,512,1024)).to('cpu')  # Esempio di input




In [None]:
#calculate FLOPS, Memory usage, dimension,

#dimension
# Salva i modelli
torch.save(Original_model.state_dict(), 'EvalDimensionOriginal.pth')
torch.save(pruned_model.state_dict(), 'EvalDimensionPruned.pth')
torch.save(loaded_quantized_model.state_dict(), 'EvalDimensionFinal.pth')

# Misura le dimensioni
size_original = os.path.getsize('EvalDimensionOriginal.pth') / (1024 ** 2)  # Dimensioni in MB
size_pruned = os.path.getsize('EvalDimensionPruned.pth') / (1024 ** 2)   #doesn't take count of the zeros
size_final = os.path.getsize('EvalDimensionFinal.pth') / (1024 ** 2)

print(f"ErfNet Size: {size_original:.2f} MB")
print(f"Pruned ErfNet Size: {size_pruned:.2f} MB")
print(f"Quantized ErfNet Size: {size_final:.2f} MB")

NameError: name 'Original_model' is not defined

In [None]:
#Running  the iou of original and pruned models on cuda
iou_P = eval_iou(pruned_model, datadir, cpu=False, num_classes=20, ignoreIndex=19)
iou_O = eval_iou(Original_model, datadir, cpu=False, num_classes=20, ignoreIndex=19)

#quantized model must run on cpu
#i.e. it's slower because of cpu but actually very fast thinking that unquantized models are basically not runnable on cpu (more than 2 hours for eval_iou)
loaded_quantized_model.to('cpu')
iou_Q = eval_iou(loaded_quantized_model,datadir,cpu=True,num_classes=20,ignoreIndex = 19)





---------------------------------------
Took  84.46612524986267 seconds
Per-Class IoU:
[0m94.25[0m Road
[0m73.43[0m sidewalk
[0m87.09[0m building
[0m37.66[0m wall
[0m47.11[0m fence
[0m43.42[0m pole
[0m36.61[0m traffic light
[0m54.63[0m traffic sign
[0m89.06[0m vegetation
[0m49.82[0m terrain
[0m92.73[0m sky
[0m67.55[0m person
[0m44.43[0m rider
[0m89.58[0m car
[0m60.87[0m truck
[0m63.69[0m bus
[0m43.13[0m train
[0m17.63[0m motorcycle
[0m62.59[0m bicycle
MEAN IoU:  [0m60.80[0m %
---------------------------------------
Took  83.33717799186707 seconds
Per-Class IoU:
[0m97.62[0m Road
[0m81.37[0m sidewalk
[0m90.77[0m building
[0m49.43[0m wall
[0m54.93[0m fence
[0m60.81[0m pole
[0m62.60[0m traffic light
[0m72.31[0m traffic sign
[0m91.35[0m vegetation
[0m60.96[0m terrain
[0m93.38[0m sky
[0m76.11[0m person
[0m53.45[0m rider
[0m92.91[0m car
[0m72.78[0m truck
[0m78.87[0m bus
[0m63.86[0m train
[0m46.40[0m motorcycle
[0m71.

  cat = torch.cat([encoder_initial_block_conv, encoder_initial_block_pool], 1);  encoder_initial_block_conv = encoder_initial_block_pool = None
  cat_1 = torch.cat([encoder_layers_0_conv, encoder_layers_0_pool], 1);  encoder_layers_0_conv = encoder_layers_0_pool = None
  cat_2 = torch.cat([encoder_layers_6_conv, encoder_layers_6_pool], 1);  encoder_layers_6_conv = encoder_layers_6_pool = None


---------------------------------------
Took  1069.0812327861786 seconds
Per-Class IoU:
[0m95.51[0m Road
[0m69.81[0m sidewalk
[0m85.48[0m building
[0m21.60[0m wall
[0m41.14[0m fence
[0m51.15[0m pole
[0m49.54[0m traffic light
[0m61.50[0m traffic sign
[0m84.75[0m vegetation
[0m46.26[0m terrain
[0m91.29[0m sky
[0m67.86[0m person
[0m43.37[0m rider
[0m87.48[0m car
[0m38.68[0m truck
[0m52.45[0m bus
[0m39.85[0m train
[0m21.50[0m motorcycle
[0m60.40[0m bicycle
MEAN IoU:  [0m58.40[0m %


In [None]:

# I suggest you to not run this cell

Original_model.to('cpu')
iou_O2 = eval_iou(Original_model,datadir,cpu=True,num_classes=20,ignoreIndex = 19)



In [None]:
def count_active_parameters(model):
    total_params = 0
    active_params = 0
    for name, param in model.named_parameters():
        if 'weight' in name:
            total_params += param.numel()  # Numero totale di parametri
            active_params += (param != 0).sum().item()  # Conta i pesi non azzerati
    return total_params, active_params

def count_active_parameters_fx_quantized(model):
    total_params = 0
    active_params = 0

    for name, module in model.named_modules():
        # Controlla se il modulo ha un peso quantizzato (ad esempio nnq.Linear, nnq.Conv2d, ecc.)
        if hasattr(module, 'weight'):
            try:
                # Recupera il tensore dei pesi dequantizzato
                weight = module.weight()
                total_params += weight.numel()
                active_params += (weight != 0).sum().item()
            except Exception:
                # Se weight non è chiamabile (es: non è un quantized module), salta
                pass

    return total_params, active_params


print('Original model: ')
total_o, active_o = count_active_parameters(Original_model)
print(f"Totale parametri: {total_o}")
print(f"Parametri attivi: {active_o}  -> {active_o/(total_o+1) * 100}%")

print('Pruned model: ')
total_p, active_p = count_active_parameters(pruned_model)
print(f"Totale parametri: {total_p}")
print(f"Parametri attivi: {active_p} -> {active_p/(total_p+1) * 100}%")

print('Quantized model: ')
total_q, active_q = count_active_parameters_fx_quantized(loaded_quantized_model)
print(f"Totale parametri: {total_q}")
print(f"Parametri attivi: {active_q}  -> {active_q/(total_q+1) * 100}%")




print('Quantized but not pruned model: ')
total_qnp, active_qnp = count_active_parameters_fx_quantized(loaded_quantized_nopruned_model)
print(f"Totale parametri: {total_qnp}")
print(f"Parametri attivi: {active_qnp}  -> {active_qnp/(total_qnp+1) * 100}%")





Original model: 
Totale parametri: 2057279
Parametri attivi: 2057278  -> 99.99990278425884%
Pruned model: 
Totale parametri: 2057279
Parametri attivi: 1598197 -> 77.68495294758128%
Quantized model: 
Totale parametri: 2051423
Parametri attivi: 1286816  -> 62.72793922660552%
Quantized but not pruned model: 
Totale parametri: 2051423
Parametri attivi: 1342613  -> 65.44785475845072%


In [None]:
!pip install fvcore
!pip install torchprofile

Collecting fvcore
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting yacs>=0.1.6 (from fvcore)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting iopath>=0.1.7 (from fvcore)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.7->fvcore)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Downloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Building wheels for collected packages: fvcore, iopath
  Building wheel for fvcore (setup.py) ... [?25l[?25hdone
  Created wheel for fvcore: filename=fvcore-0.1.5.

In [None]:

from fvcore.nn import FlopCountAnalysis

flops = FlopCountAnalysis(Original_model.to('cpu'), inputs)
print(f"ErfNet FLOPs: {flops.total()}")
flops_pruned = FlopCountAnalysis(pruned_model.to('cpu'), inputs)
print(f"Pruned ErfNet FLOPs: {flops_pruned.total()}")

decoder.layers.1.dropout, decoder.layers.2.dropout, decoder.layers.4.dropout, decoder.layers.5.dropout, encoder.output_conv


ErfNet FLOPs: 26740654080


decoder.layers.1.dropout, decoder.layers.2.dropout, decoder.layers.4.dropout, decoder.layers.5.dropout, encoder.output_conv


Pruned ErfNet FLOPs: 26740654080


# Prove di Quantizzazione (Scartato)





## prove 1,2,3


PROVA 1


In [None]:
import torch.quantization as quant
import torch.nn as nn

# 1. Load your pruned model:
model = ERFNet(num_classes=20)
model.load_state_dict(torch.load('/content/drive/MyDrive/trained_models/erfnet_pruning025.pth'))
device = "cuda"
datadir = '/content/drive/MyDrive/dataset/Cityscapes'

model = model.to(device)

# 2. Define the custom function to exclude ConvTranspose2d:
def exclude_conv_transpose_qconfig(model):
    for name, submodule in model.named_modules():
        if isinstance(submodule, torch.nn.ConvTranspose2d):
            submodule.qconfig = None  # Disable quantization for ConvTranspose2d
        else:
            # Enable per-tensor quantization with signed int8 (qint8)
            submodule.qconfig = quant.QConfig(
                activation=quant.HistogramObserver.with_args(reduce_range=False),
                weight=quant.MinMaxObserver.with_args(
                    dtype=torch.qint8,  # Changed to torch.qint8
                    qscheme=torch.per_tensor_symmetric,  # Changed to per_tensor_symmetric
                )
            )

# 3. Apply custom qconfig to the model
exclude_conv_transpose_qconfig(model)

# 4. Prepare your model for static quantization
model.eval()
torch.quantization.prepare(model, inplace=True)

# 5. Calibrate the model
calibrazione_loader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)
with torch.no_grad():
    for inputs, _ in calibrazione_loader:
        model(inputs.to(device))  # Pass some batches to calibrate scale and offset

# 6. Convert the model
torch.quantization.convert(model, inplace=True)

# 7. Save the quantized model
torch.save(model.state_dict(), 'quantized_pruned_erfnet.pth')

  model.load_state_dict(torch.load('/content/drive/MyDrive/trained_models/erfnet_pruning025.pth'))


In [None]:
import torch.quantization as quant
import torch.nn as nn

class QuantizedERFNet(nn.Module):
    def __init__(self, num_classes=20):
        super(QuantizedERFNet, self).__init__()
        self.num_classes = num_classes
        self.model = ERFNet(num_classes)  # Instantiate the original model

        # This is the exact code used to prepare the model for quantization in the first place
        def exclude_conv_transpose_qconfig(model):
            for name, submodule in model.named_modules():
                if isinstance(submodule, torch.nn.ConvTranspose2d):
                    submodule.qconfig = None  # Disable quantization for ConvTranspose2d
                else:
                    # Enable per-tensor quantization with signed int8 (qint8)
                    submodule.qconfig = quant.QConfig(
                        activation=quant.HistogramObserver.with_args(reduce_range=False),
                        weight=quant.MinMaxObserver.with_args(
                            dtype=torch.qint8,  # Changed to torch.qint8
                            qscheme=torch.per_tensor_symmetric,  # Changed to per_tensor_symmetric
                        )
                    )
        # 3. Apply custom qconfig to the model
        exclude_conv_transpose_qconfig(self.model)

        # 4. Prepare your model for static quantization
        self.model.eval()
        torch.quantization.prepare(self.model, inplace=True)

        # 5. Calibrate the model IS NOT NEEDED HERE BECAUSE WE ARE ONLY LOADING THE MODEL, NOT RE-TRAINING
        """calibrazione_loader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)
        with torch.no_grad():
            for inputs, _ in calibrazione_loader:
                model(inputs.to(device))  # Pass some batches to calibrate scale and offset"""

        # 6. Convert the model
        torch.quantization.convert(self.model, inplace=True)


    def forward(self, input, only_encode=False):
        return self.model(input) # Now this should run after the model has been correctly initialized

    """def load_state_dict(self,state_dict):
        return self.model.load_state_dict(state_dict)"""







In [None]:
def load_my_quant_state_dict(QuantModel, state_dict ='quantized_pruned_erfnet.pth' ):
  QuantModel = QuantizedERFNet(num_classes=20)
  QuantModel.model.load_state_dict(torch.load(state_dict))
  QuantModel = QuantModel.to(device)
  return QuantModel

PROVA 2

In [None]:
import torch.quantization as quant
import torch.nn as nn
import torch.nn.utils.prune as prune

# 1. Instantiate your model:
quantized_model = ERFNet(num_classes=20)

# 2. Load pre-trained weights for pruning (not quantized ones):
#weightspath = '/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'
weightspath_pruned = 'erfnetPruned.pth'
quantized_model = load_my_state_dict(quantized_model, torch.load(weightspath_pruned, map_location=lambda storage, loc: storage))

"""# 3. Apply pruning:
for name, module in quantized_model.named_modules():
    if isinstance(module, torch.nn.Conv2d):  # Prune only Conv2d layers
        prune.l1_unstructured(module, name='weight', amount=0.25) """

"""# 4. Remove the pruning mask:
for name, module in quantized_model.named_modules():
    if isinstance(module, torch.nn.Conv2d):
        prune.remove(module, 'weight')  """

# 5. Perform dynamic quantization:
quantized_model = quant.quantize_dynamic(
    quantized_model, {nn.Conv2d, nn.Linear}, dtype=torch.qint8
)

  quantized_model = load_my_state_dict(quantized_model, torch.load(weightspath_pruned, map_location=lambda storage, loc: storage))


PROVA 3

In [None]:
model = ERFNet(num_classes=20)
model.load_state_dict(torch.load('erfnetPruned.pth'))
model = model.to(device)

  model.load_state_dict(torch.load('erfnetPruned.pth'))


In [None]:
model.train()



model.qconfig = quant.QConfig(
    activation=quant.HistogramObserver.with_args(reduce_range=False),  # Activation quantization remains unchanged
    weight=quant.MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)  # Force per-tensor for weights
)

torch.quantization.prepare_qat(model, inplace=True)


ERFNet(
  (encoder): Encoder(
    (initial_block): DownsamplerBlock(
      (conv): Conv2d(
        3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)
        (weight_fake_quant): MinMaxObserver(min_val=inf, max_val=-inf)
        (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
      )
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (bn): BatchNorm2d(
        16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
        (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
      )
    )
    (layers): ModuleList(
      (0): DownsamplerBlock(
        (conv): Conv2d(
          16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)
          (weight_fake_quant): MinMaxObserver(min_val=inf, max_val=-inf)
          (activation_post_process): HistogramObserver(min_val=inf, max_val=-inf)
        )
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

In [None]:
train_loader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)

for epoch in range(5):  # Numero di epoche per il fine-tuning
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()


In [None]:
#model.eval()
torch.quantization.convert(model, inplace=True)

NotImplementedError: Could not run 'quantized::conv_transpose2d_prepack' with arguments from the 'QuantizedCUDA' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv_transpose2d_prepack' is only available for these backends: [Meta, QuantizedCPU, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradMPS, AutogradXPU, AutogradHPU, AutogradLazy, AutogradMeta, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

Meta: registered at ../aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
QuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv_prepack.cpp:828 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:63 [backend fallback]
AutogradCPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:67 [backend fallback]
AutogradCUDA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:75 [backend fallback]
AutogradXLA: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:79 [backend fallback]
AutogradMPS: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:87 [backend fallback]
AutogradXPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:71 [backend fallback]
AutogradHPU: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:100 [backend fallback]
AutogradLazy: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:83 [backend fallback]
AutogradMeta: registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:91 [backend fallback]
Tracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:321 [backend fallback]
AutocastXPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:463 [backend fallback]
AutocastMPS: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


## Prova 4

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.quantization as quant


class QDownsamplerBlock(nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()
        self.quant = quant.QuantStub()  # Quantizzazione all'ingresso del blocco
        self.conv = nn.Conv2d(ninput, noutput - ninput, (3, 3), stride=2, padding=1, bias=True)
        self.pool = nn.MaxPool2d(2, stride=2)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
        self.dequant = quant.DeQuantStub()  # Dequantizzazione alla fine del blocco

    def forward(self, input):
        input = self.quant(input)
        conv_out = self.conv(input)
        pool_out = self.pool(input)
        output = torch.cat([conv_out, pool_out], 1)
        output = self.bn(output)
        output = F.relu(output)
        output = self.dequant(output)
        return output


class Qnon_bottleneck_1d(nn.Module):
    def __init__(self, chann, dropprob, dilated):
        super().__init__()
        self.conv3x1_1 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1, 0), bias=True)
        self.conv1x3_1 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1), bias=True)
        self.bn1 = nn.BatchNorm2d(chann, eps=1e-03)
        self.conv3x1_2 = nn.Conv2d(chann, chann, (3, 1), stride=1, padding=(1 * dilated, 0), bias=True, dilation=(dilated, 1))
        self.conv1x3_2 = nn.Conv2d(chann, chann, (1, 3), stride=1, padding=(0, 1 * dilated), bias=True, dilation=(1, dilated))
        self.bn2 = nn.BatchNorm2d(chann, eps=1e-03)
        self.dropout = nn.Dropout2d(dropprob)

    def forward(self, input):
        output = self.conv3x1_1(input)
        output = F.relu(output)
        output = self.conv1x3_1(output)
        output = self.bn1(output)
        output = F.relu(output)
        output = self.conv3x1_2(output)
        output = F.relu(output)
        output = self.conv1x3_2(output)
        output = self.bn2(output)
        if self.dropout.p != 0:
            output = self.dropout(output)
        return F.relu(output + input)  # Residual connection


class QEncoder(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.quant = quant.QuantStub()
        self.initial_block = DownsamplerBlock(3, 16)
        self.layers = nn.ModuleList()
        self.layers.append(DownsamplerBlock(16, 64))
        for x in range(5):
            self.layers.append(non_bottleneck_1d(64, 0.1, 1))
        self.layers.append(DownsamplerBlock(64, 128))
        for x in range(2):
            self.layers.append(non_bottleneck_1d(128, 0.1, 2))
            self.layers.append(non_bottleneck_1d(128, 0.1, 4))
            self.layers.append(non_bottleneck_1d(128, 0.1, 8))
            self.layers.append(non_bottleneck_1d(128, 0.1, 16))
        self.output_conv = nn.Conv2d(128, num_classes, 1, stride=1, padding=0, bias=True)
        self.dequant = quant.DeQuantStub()

    def forward(self, input, predict: bool = False):
        input = self.quant(input)
        output = self.initial_block(input)
        for layer in self.layers:
            output = layer(output)
        if predict:
            output = self.output_conv(output)
        output = self.dequant(output)
        return output


class QUpsamplerBlock(nn.Module):
    def __init__(self, ninput, noutput):
        super().__init__()
        self.quant = torch.quantization.QuantStub()  # Stub per quantizzare
        self.conv = nn.ConvTranspose2d(ninput, noutput, 3, stride=2, padding=1, output_padding=1, bias=True)
        self.bn = nn.BatchNorm2d(noutput, eps=1e-3)
        self.dequant = torch.quantization.DeQuantStub()  # Stub per de-quantizzare

    def forward(self, input):
        input = self.quant(input)
        output = self.conv(input)
        output = self.bn(output)
        output = F.relu(output)
        return self.dequant(output)



class QDecoder(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.layers = nn.ModuleList()
        self.layers.append(UpsamplerBlock(128, 64))
        self.layers.append(non_bottleneck_1d(64, 0, 1))
        self.layers.append(non_bottleneck_1d(64, 0, 1))
        self.layers.append(UpsamplerBlock(64, 16))
        self.layers.append(non_bottleneck_1d(16, 0, 1))
        self.layers.append(non_bottleneck_1d(16, 0, 1))
        self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True)

    def forward(self, input):
        output = input
        for layer in self.layers:
            output = layer(output)
        output = self.output_conv(output)
        return output


class QERFNet(nn.Module):
    def __init__(self, num_classes, encoder=None):
        super().__init__()
        self.encoder = encoder if encoder is not None else Encoder(num_classes)
        self.decoder = Decoder(num_classes)

    def forward(self, input, only_encode=False):
        if only_encode:
            return self.encoder.forward(input, predict=True)
        else:
            output = self.encoder(input)
            return self.decoder.forward(output)


In [None]:
device = 'cuda'

model = QERFNet(num_classes=20)
load_my_state_dict(model,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth',map_location = torch.device('cpu')))
model = model.to(device)


  load_my_state_dict(model,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth',map_location = torch.device('cpu')))


In [None]:
from torch.quantization import default_qconfig, default_observer, prepare, convert


model.qconfig = torch.quantization.QConfig(
    activation=default_observer,  # Osservatore per le attivazioni
    weight=torch.quantization.default_weight_observer  # Osservatore per i pesi (per tensor)
)
model.eval()
calibration_dataloader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)
# Prepara il modello per la quantizzazione statica
prepared_model = torch.quantization.prepare(model)
for images, _ in calibration_dataloader:  # Usa un DataLoader con immagini di calibrazione
    prepared_model(images.to(device))


In [None]:
quantized_model = torch.quantization.convert(prepared_model)

In [None]:
def load_my_quant_state_dict(model, device ='cuda',num_classes=20,state_dict_name='quantized_erfnet.pth',map_location = torch.device('cuda')):
    # Inizializza il modello
    model = QERFNet(num_classes=num_classes)

    model.qconfig = torch.quantization.QConfig(
    activation=default_observer,  # Osservatore per le attivazioni
    weight=torch.quantization.default_weight_observer  # Osservatore per i pesi (per tensor)
    )
    model.eval()

    model = torch.quantization.prepare(model)
    model = torch.quantization.convert(model)

    model.load_state_dict(torch.load(state_dict_name))
    model = model.to(device)
    return model




In [None]:
torch.save(quantized_model.state_dict(), "/content/drive/MyDrive/trained_models/quantized_model_PTQ_cpu.pth")

# Carica il modello quantizzato
loaded_model = QERFNet(num_classes=20)
load_my_quant_state_dict(loaded_model,state_dict_name="/content/drive/MyDrive/trained_models/quantized_model_PTQ_cpu.pth")

  model.load_state_dict(torch.load(state_dict_name))


QERFNet(
  (encoder): Encoder(
    (initial_block): DownsamplerBlock(
      (conv): QuantizedConv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), scale=0.2823339104652405, zero_point=68, padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (bn): QuantizedBatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (layers): ModuleList(
      (0): DownsamplerBlock(
        (conv): QuantizedConv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), scale=0.32048964500427246, zero_point=77, padding=(1, 1))
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (bn): QuantizedBatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): non_bottleneck_1d(
        (conv3x1_1): QuantizedConv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), scale=0.263015478849411, zero_point=83, padding=(1, 0))
        (conv1x3_1): QuantizedConv2d(64, 64, kern

In [None]:
# prompt: calcola dimensioni di: loaded,quantized,original

import os

# Assuming 'erfnetUnPruned.pth', 'erfnetPruned.pth', and 'quantized_pruned_erfnet.pth' exist
# in the current working directory or a specified path.

def get_file_size_mb(file_path):
    """Calculates and returns the file size in MB."""
    try:
        size_bytes = os.path.getsize(file_path)
        size_mb = size_bytes / (1024 ** 2)
        return size_mb
    except FileNotFoundError:
        return "File not found"

original_model_size = get_file_size_mb('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth')
pruned_model_size = get_file_size_mb('/content/drive/MyDrive/trained_models/erfnet_pruning025.pth')
quantized_model_size = get_file_size_mb('/content/drive/MyDrive/trained_models/quantized_model_PTQ_cpu.pth')


print(f"Original model size: {original_model_size:.2f} MB")
print(f"Pruned model size: {pruned_model_size:.2f} MB")
print(f"Quantized model size: {quantized_model_size:.2f} MB")

Original model size: 7.94 MB
Pruned model size: 8.03 MB
Quantized model size: 2.22 MB


## FXGRAPH

In [None]:
Original_model = ERFNet(num_classes=20)
load_my_state_dict(Original_model,torch.load('/content/drive/MyDrive/trained_models/erfnet_pruned_30.pth'))
Original_model.to('cpu')

model_to_quantize = ERFNet(num_classes=20)
load_my_state_dict(model_to_quantize,torch.load('/content/drive/MyDrive/trained_models/erfnet_structured_pruning03.pth'))
model_to_quantize.to('cpu')

dataloader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)


  load_my_state_dict(Original_model,torch.load('/content/drive/MyDrive/trained_models/erfnet_structured_pruning03.pth'))
  load_my_state_dict(model_to_quantize,torch.load('/content/drive/MyDrive/trained_models/erfnet_structured_pruning03.pth'))


In [None]:
model_to_quantize.eval()


ERFNet(
  (encoder): Encoder(
    (initial_block): DownsamplerBlock(
      (conv): Conv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (layers): ModuleList(
      (0): DownsamplerBlock(
        (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1-5): 5 x non_bottleneck_1d(
        (conv3x1_1): Conv2d(64, 64, kernel_size=(3, 1), stride=(1, 1), padding=(1, 0))
        (conv1x3_1): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
        (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (conv3x1_2): Conv2d(64

In [None]:


qconfig_opt = get_default_qconfig("x86")

qconfig_mapping = QConfigMapping().set_global(qconfig_opt).set_object_type(
                                      torch.nn.ConvTranspose2d, get_default_qconfig("qnnpack")
                                  )  # qconfig_opt is an optional qconfig, either a valid qconfig or None


In [None]:

example_inputs = dataloader.dataset[0][0].unsqueeze(0)


In [None]:
example_inputs = dataloader.dataset[0][0].unsqueeze(0)
prepared_model = prepare_fx(model_to_quantize, qconfig_mapping, example_inputs)
print(prepared_model.graph)



graph():
    %input_1 : [num_users=1] = placeholder[target=input]
    %activation_post_process_0 : [num_users=2] = call_module[target=activation_post_process_0](args = (%input_1,), kwargs = {})
    %only_encode : [num_users=0] = placeholder[target=only_encode](default=False)
    %encoder_initial_block_conv : [num_users=1] = call_module[target=encoder.initial_block.conv](args = (%activation_post_process_0,), kwargs = {})
    %activation_post_process_1 : [num_users=1] = call_module[target=activation_post_process_1](args = (%encoder_initial_block_conv,), kwargs = {})
    %encoder_initial_block_pool : [num_users=1] = call_module[target=encoder.initial_block.pool](args = (%activation_post_process_0,), kwargs = {})
    %activation_post_process_2 : [num_users=1] = call_module[target=activation_post_process_2](args = (%encoder_initial_block_pool,), kwargs = {})
    %cat : [num_users=1] = call_function[target=torch.cat](args = ([%activation_post_process_1, %activation_post_process_2], 1), kwarg

In [None]:
def calibrate(model, data_loader):
    model.eval()
    with torch.no_grad():
        for image, target in data_loader:
            model(image)
calibrate(prepared_model, dataloader)

In [None]:
quantized_model = convert_fx(prepared_model)
print(quantized_model)

GraphModule(
  (encoder): Module(
    (initial_block): Module(
      (conv): QuantizedConv2d(3, 13, kernel_size=(3, 3), stride=(2, 2), scale=0.27467986941337585, zero_point=68, padding=(1, 1))
      (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (bn): QuantizedBNReLU2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (layers): Module(
      (0): Module(
        (conv): QuantizedConv2d(16, 48, kernel_size=(3, 3), stride=(2, 2), scale=0.19964741170406342, zero_point=62, padding=(1, 1))
        (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        (bn): QuantizedBNReLU2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): Module(
        (conv3x1_1): QuantizedConvReLU2d(64, 64, kernel_size=(3, 1), stride=(1, 1), scale=0.024806007742881775, zero_point=0, padding=(1, 0))
        (conv1x3_1): QuantizedConvReLU2d(64, 64, kernel_size=(1, 3), stride=(1, 

In [None]:
def print_size_of_model(model):
    if isinstance(model, torch.jit.RecursiveScriptModule):
        torch.jit.save(model, "temp.p")
    else:
        torch.jit.save(torch.jit.script(model), "temp.p")
    print("Size (MB):", os.path.getsize("temp.p")/1e6)
    os.remove("temp.p")
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        # Reshape the output and target to 2D tensors
        output = output.reshape(batch_size, -1)  # Flatten spatial dimensions using reshape()
        target = target.reshape(batch_size, -1)  # Flatten spatial dimensions using reshape()

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

def evaluate(model, criterion, data_loader):
    model.eval()
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    cnt = 0
    with torch.no_grad():
        for image, target in data_loader:
            output = model(image)
            loss = criterion(output, target)
            cnt += 1
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            top1.update(acc1[0], image.size(0))
            top5.update(acc5[0], image.size(0))
    print('')

    return top1, top5

In [None]:
def load_my_quant_fx_state_dict(filepath,device='cpu',print=False):
    model = ERFNet(num_classes=20)
    model.eval()
    qconfig_opt = get_default_qconfig("x86")

    qconfig_mapping = QConfigMapping().set_global(qconfig_opt).set_object_type(
                                      torch.nn.ConvTranspose2d, get_default_qconfig("qnnpack")
                                  )  # qconfig_opt is an optional qconfig, either a valid qconfig or None
    dataloader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)
    example_inputs = dataloader.dataset[0][0].unsqueeze(0)
    model = prepare_fx(model, qconfig_mapping, example_inputs)
    if print:
        print(model.graph)
    model = convert_fx(model)
    if print:
        print(model)
    model.load_state_dict(torch.load(filepath))
    if print:
      print("model loaded successfully")

    return model


In [None]:

fx_graph_mode_model_file_path = 'quantized_pruned_erfnet_fx.pth'

torch.save(quantized_model, fx_graph_mode_model_file_path)
torch.save(quantized_model.state_dict(), 'sd_'+ fx_graph_mode_model_file_path)


In [None]:
loaded_quantized_model = load_my_quant_fx_state_dict('sd_'+ fx_graph_mode_model_file_path,print=False)
criterion = nn.CrossEntropyLoss()

  model.load_state_dict(torch.load(filepath))
  device=storage.device,


In [None]:
top1, top5 = evaluate(loaded_quantized_model, criterion, dataloader)
print("[after serialization/deserialization] Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))

  cat = torch.cat([encoder_initial_block_conv, encoder_initial_block_pool], 1);  encoder_initial_block_conv = encoder_initial_block_pool = None
  cat_1 = torch.cat([encoder_layers_0_conv, encoder_layers_0_pool], 1);  encoder_layers_0_conv = encoder_layers_0_pool = None
  cat_2 = torch.cat([encoder_layers_6_conv, encoder_layers_6_pool], 1);  encoder_layers_6_conv = encoder_layers_6_pool = None


RuntimeError: The expanded size of the tensor (1) must match the existing size (131072) at non-singleton dimension 1.  Target sizes: [5, 1].  Tensor sizes: [1, 131072]

## QAT

In [None]:
from torch.quantization import prepare_qat, get_default_qat_qconfig, default_weight_observer,QConfig, default_observer

device = 'cuda'

qat_qconfig = QConfig(
    activation=default_observer,  # Per gli attivatori
    weight=default_weight_observer  # Per i pesi (usa per_tensor_affine)
)

# Assegna la configurazione al modello
model.qconfig = qat_qconfig

torch.backends.quantized.engine = 'fbgemm'  # Cambia con 'fbgemm' per CPU


model.train()
qat_model = prepare_qat(model)

calibration_dataloader  = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(qat_model.parameters(), lr=1e-4)

for epoch in range(10):
    for images, labels in calibration_dataloader:
        optimizer.zero_grad()
        outputs = qat_model(images.to(device))
        loss = criterion(outputs.to(device), labels.to(device))
        loss.backward()
        optimizer.step()
qat_model.eval()
quantized_model = torch.quantization.convert(qat_model)


NameError: name 'model' is not defined

In [None]:
import torch
import torch.nn as nn
import torch.quantization as quant

# Definizione del modello quantizzato
class QERFNet(nn.Module):
    def __init__(self, num_classes):
        super(QERFNet, self).__init__()
        self.quant = quant.QuantStub()  # Stub per quantizzare l'ingresso
        self.encoder = QEncoder(num_classes)
        self.decoder = QDecoder(num_classes)
        self.dequant = quant.DeQuantStub()  # Stub per de-quantizzare l'uscita

    def forward(self, x, only_encode=False):
        x = self.quant(x)
        if only_encode:
            x = self.encoder(x, predict=True)
        else:
            x = self.encoder(x)
            x = self.decoder(x)
        return self.dequant(x)


class QEncoder(nn.Module):
    def __init__(self, num_classes):
        super(QEncoder, self).__init__()
        self.initial_block = DownsamplerBlock(3, 16)
        self.layers = nn.ModuleList([DownsamplerBlock(16, 64)] + [non_bottleneck_1d(64, 0.1, 1) for _ in range(5)])
        self.layers.append(DownsamplerBlock(64, 128))
        self.layers.extend([non_bottleneck_1d(128, 0.1, 2)] * 2)

    def forward(self, x, predict=False):
        x = self.initial_block(x)
        for layer in self.layers:
            x = layer(x)
        return x


class QDecoder(nn.Module):
    def __init__(self, num_classes):
        super(QDecoder, self).__init__()
        self.layers = nn.ModuleList([
            UpsamplerBlock(128, 64),
            non_bottleneck_1d(64, 0, 1),
            non_bottleneck_1d(64, 0, 1),
            UpsamplerBlock(64, 16),
            non_bottleneck_1d(16, 0, 1),
            non_bottleneck_1d(16, 0, 1)
        ])
        self.output_conv = nn.ConvTranspose2d(16, num_classes, 2, stride=2, padding=0, output_padding=0, bias=True)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.output_conv(x)
        return x

device = 'cpu'
# Caricamento della rete pre-addestrata
pretrained_model = ERFNet(num_classes=20)
load_my_state_dict(pretrained_model,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'))


# Creazione del modello quantizzato
quantized_model = QERFNet(num_classes=20)

# Trasferimento dei pesi dal modello originale a quello quantizzato
def transfer_weights(pretrained_model, quantized_model):
    pretrained_state_dict = pretrained_model.state_dict()
    quantized_state_dict = quantized_model.state_dict()

    for name, param in pretrained_state_dict.items():
        if name in quantized_state_dict:
            quantized_state_dict[name].data.copy_(param.data)

    quantized_model.load_state_dict(quantized_state_dict)

# Trasferisci i pesi
transfer_weights(pretrained_model, quantized_model)

# Configura il modello per il backend QNNPACK
torch.backends.quantized.engine = 'qnnpack'
quantized_model.qconfig = torch.quantization.get_default_qconfig('qnnpack')

# Preparazione e conversione del modello
torch.quantization.prepare(quantized_model, inplace=True)

# Simula input per calibrare le attivazioni
dummy_input = torch.randn(1, 3, 224, 224)  # Cambia le dimensioni secondo il tuo dataset
quantized_model(dummy_input)

# Converti il modello a int8
torch.quantization.convert(quantized_model, inplace=True)

# Salva il modello quantizzato
torch.save(quantized_model.state_dict(), "q_erfnet_qnnpack.pth")

# Verifica del modello quantizzato
print("Quantized model ready!")


  load_my_state_dict(pretrained_model,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'))


Quantized model ready!


In [None]:
QModel = QERFNet(num_classes=20)
load_my_state_dict(QModel,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'))



# Caricamento della rete pre-addestrata
pretrained_model = ERFNet(num_classes=20)
load_my_state_dict(pretrained_model,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'))


# Creazione del modello quantizzato
quantized_model = QERFNet(num_classes=20)

# Trasferimento dei pesi dal modello originale a quello quantizzato
def transfer_weights(pretrained_model, quantized_model):
    pretrained_state_dict = pretrained_model.state_dict()
    quantized_state_dict = quantized_model.state_dict()
    c =0
    for name, param in quantized_state_dict.items():
        # Salta i nuovi stub quant e dequant
        if "quant" in name or "dequant" in name:
          print(c)
          c+=1
          continue


        # Assicurati che il nome corrisponda
        if name in quantized_state_dict:
            quantized_state_dict[name].data.copy_(param.data)

    quantized_model.load_state_dict(quantized_state_dict)

# Trasferisci i pesi
transfer_weights(pretrained_model, quantized_model)

# Configura il modello per la quantizzazione
torch.backends.quantized.engine = 'fbgemm'
quantized_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')

# Prepara e converti il modello
#torch.quantization.prepare(quantized_model, inplace=True)
torch.quantization.convert(quantized_model, inplace=True)

# Salva il modello quantizzato
torch.save(quantized_model.state_dict(), "q_erfnet.pth")
#print the layers of the model

  load_my_state_dict(QModel,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'))
  load_my_state_dict(pretrained_model,torch.load('/content/drive/MyDrive/trained_models/erfnet_pretrained.pth'))


## Prova 5

In [None]:
import torch.nn.utils.fusion as fuse
class QuantizableERFNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.quant = quant.QuantStub()  # Punto di quantizzazione
        self.encoder = Encoder(num_classes)
        self.decoder = Decoder(num_classes)
        self.dequant = quant.DeQuantStub()  # Punto di dequantizzazione

    def forward(self, x, only_encode=False):
        x = self.quant(x)  # Quantizzazione input
        if only_encode:
            x = self.encoder(x, predict=True)
        else:
            x = self.encoder(x)
            x = self.decoder(x)
        x = self.dequant(x)  # Dequantizzazione output
        return x

def fuse_conv_bn_relu_skip_maxpool(model):
    # Keep track of fused modules to avoid redundant fusions
    fused_modules = set()
    for name, module in model.named_children():
        # Ricorsione per i sotto-moduli
        if isinstance(module, nn.Sequential):
            # Only fuse if the module hasn't been fused before
            if tuple(module._modules.keys()) not in fused_modules:
                fuse_conv_bn_relu_skip_maxpool(module)
                fused_modules.add(tuple(module._modules.keys()))  # Add fused module to the set

        elif isinstance(module, DownsamplerBlock):
            # Salta i blocchi con MaxPool2d
            print(f"Skipping fusion for {name} due to MaxPool2d in DownsamplerBlock")
            continue
        else:
            # Fusione di BatchNorm2d e ReLU
            for submodule_name, submodule in module.named_children():
                if isinstance(submodule, nn.BatchNorm2d):
                    # Trova il successivo ReLU, se esiste
                    parent_module_name = name
                    module_index = list(module._modules.keys()).index(submodule_name)

                    # Controlla se il prossimo layer è ReLU
                    if module_index + 1 < len(module._modules):
                        next_name = list(module._modules.keys())[module_index + 1]
                        next_module = module._modules[next_name]
                        if isinstance(next_module, nn.ReLU):
                            print(f"Fusing BatchNorm2d and ReLU in {name}: {submodule_name} and {next_name}")
                            fused = fuse.fuse_bn_relu(submodule, next_module)
                            setattr(module, submodule_name, fused)  # Sostituisci BatchNorm2d
                            delattr(module, next_name)  # Rimuovi il ReLU

def fix_batchnorm_after_pruning(model):
    for name, module in model.named_modules():
        if isinstance(module, nn.BatchNorm2d):
            corresponding_conv_name = name.replace("bn", "conv")  # Trova la convoluzione associata
            try:
                corresponding_conv = dict(model.named_modules())[corresponding_conv_name]
                if isinstance(corresponding_conv, nn.Conv2d):
                    out_channels = corresponding_conv.out_channels
                    if module.num_features != out_channels:
                        print(f"Fixing {name}: BatchNorm features {module.num_features} -> {out_channels}")
                        # Ricrea BatchNorm con i canali corretti
                        new_bn = nn.BatchNorm2d(out_channels, eps=module.eps, momentum=module.momentum)
                        setattr(model, name.split('.')[-1], new_bn)  # Sostituisci BatchNorm nel modello
            except KeyError:
                print(f"Skipping {name}: No corresponding convolution found")

# Escludi ConvTranspose2d dalla quantizzazione
def exclude_conv_transpose_qconfig(model):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.ConvTranspose2d):
            module.qconfig = None  # Disabilita la quantizzazione per ConvTranspose2d

def load_my_quant_state_dict(model, state_dict_name='quantized_erfnet.pth', d = 'cuda'):
    # Inizializza il modello
    model = QuantizableERFNet(num_classes=20)

    # Carica i pesi prunati
    if d == 'cuda':
      model_weights = torch.load(state_dict_name, map_location=torch.device('cuda') )
    else:
      model_weights = torch.load(state_dict_name, map_location=torch.device('cpu'))
    #model.load_state_dict(model_weights, strict=False)

    # Applica la fusione
    fuse_conv_bn_relu_skip_maxpool(model)

    # Verifica il risultato
    print("Fusione completata.")

    model.qconfig = quant.QConfig(
        activation=quant.HistogramObserver.with_args(reduce_range=False),
        weight=quant.MinMaxObserver.with_args(
            dtype=torch.qint8,  # Changed to torch.qint8
            qscheme=torch.per_tensor_symmetric,  # Changed to per_tensor_symmetric
        )
    )
    model = model.to(device)
    exclude_conv_transpose_qconfig(model)

    torch.quantization.prepare(model, inplace=True)

    # Conversione
    torch.quantization.convert(model, inplace=True)

    # Load the weights into the new quantized model
    # This must be done AFTER the model has been prepared and converted,
    # otherwise the quantized modules will not be correctly updated with
    # the loaded weights
    model.load_state_dict(model_weights)

    # Set quantization parameters for further model utilization
    model.qconfig = quant.QConfig(
        activation=quant.HistogramObserver.with_args(reduce_range=False),
        weight=quant.MinMaxObserver.with_args(
            dtype=torch.qint8,  # Changed to torch.qint8
            qscheme=torch.per_tensor_symmetric,  # Changed to per_tensor_symmetric
        )
    )

    return model

In [None]:
exclude_conv_transpose_qconfig(model)



Fusione completata.


  model_weights = torch.load('/content/drive/MyDrive/trained_models/erfnet_pruning025.pth', map_location=device)


In [None]:
# Inizializza il modello
model = QuantizableERFNet(num_classes=20)

# Carica i pesi prunati
model_weights = torch.load('/content/drive/MyDrive/trained_models/erfnet_pruning025.pth', map_location=device)
model.load_state_dict(model_weights, strict=False)

# Applica la fusione
fuse_conv_bn_relu_skip_maxpool(model)

# Verifica il risultato
print("Fusione completata.")


calibrazione_loader = get_cityscapes_loader(datadir, 1, 'val',num_workers=2,size = 256)
model.to(device)


model.qconfig = quant.QConfig(
    activation=quant.HistogramObserver.with_args(reduce_range=False),
    weight=quant.MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)
)

exclude_conv_transpose_qconfig(model)


torch.quantization.prepare(model, inplace=True)



# Calibrazione
model.eval()
with torch.no_grad():
    for inputs, _ in calibrazione_loader:
        model(inputs.to(device))

# Conversione
torch.quantization.convert(model, inplace=True)

# Salva il modello quantizzato
torch.save(model.state_dict(), 'quantized_erfnet.pth')




In [None]:
model.load_state_dict(torch.load('quantized_erfnet.pth'))
model = model.to(device)

  model.load_state_dict(torch.load('quantized_erfnet.pth'))


In [None]:
"""def load_my_quant_state_dict(model, state_dict_name='quantized_erfnet.pth'):
    # Inizializza il modello
    model = QuantizableERFNet(num_classes=20)

    # Carica i pesi prunati
    model_weights = torch.load(state_dict_name, map_location=device)
    model.load_state_dict(model_weights, strict=False)

    # Applica la fusione
    fuse_conv_bn_relu_skip_maxpool(model)

    # Verifica il risultato
    print("Fusione completata.")

    model.qconfig = quant.QConfig(
        activation=quant.HistogramObserver.with_args(reduce_range=False),
        weight=quant.MinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_tensor_symmetric)
    )

    exclude_conv_transpose_qconfig(model)


    torch.quantization.prepare(model, inplace=True)



    # Calibrazione
    model.eval()
    #with torch.no_grad():
        #for inputs, _ in calibrazione_loader:
         #   model(inputs.to(device))

    # Conversione
    torch.quantization.convert(model, inplace=True)

    # Salva il modello quantizzato
    model.load_state_dict(torch.load(state_dict_name))
    model = model.to(device)
    return model"""

In [None]:
QMOdel = QuantizableERFNet(num_classes=20)
QMOdel = load_my_quant_state_dict(QMOdel,state_dict_name='quantized_erfnet.pth')
QMOdel = QMOdel.to(device)




  model_weights = torch.load(state_dict_name, map_location = device)


Fusione completata.


  model.load_state_dict(torch.load(state_dict_name))
