In [None]:
import torch
import torch.quantization
from clasificadorB1 import *

autoencoder_checkpoint_path70 = r'C:\Users\tian_\Desktop\lightning_logs\unet_autoencoderB2-70.pth'
autoencoder_checkpoint_path90 = r'C:\Users\tian_\Desktop\lightning_logs\unet_autoencoderB2-90.pth'

def load_model(path):
    model = AutoencoderVGG16(data_dir, num_classes, autoencoder_checkpoint_path70)
    model.load_state_dict(torch.load(path))
    return model

# Load models
model1 = load_model('model1.pth')
model2 = load_model('model2.pth')
model3 = load_model('model3.pth')

# Funcion para cuantizar el modelo
def quantize_model(model):
    model.eval()
    model.fuse_model()
    model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
    torch.quantization.prepare(model, inplace=True)
    torch.quantization.convert(model, inplace=True)
    return model

# Fuse and quantize the models
quantized_model1 = quantize_model(model1)
quantized_model2 = quantize_model(model2)
quantized_model3 = quantize_model(model3)

# Save the quantized models
torch.save(quantized_model1.state_dict(), 'quantized_model1.pth')
torch.save(quantized_model2.state_dict(), 'quantized_model2.pth')
torch.save(quantized_model3.state_dict(), 'quantized_model3.pth')


### Paso 3: Comparar latencias, tamaños y rendimientos

import time

def measure_latency(model, dataloader):
    latencies = []
    model.eval()
    with torch.no_grad():
        for inputs, _ in dataloader:
            start_time = time.time()
            outputs = model(inputs)
            end_time = time.time()
            latencies.append(end_time - start_time)
    return sum(latencies) / len(latencies)

def measure_size(model_path):
    return os.path.getsize(model_path)

def measure_performance(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Medir latencias, tamaños y rendimientos de los modelos
def evaluate_model(model, dataloader, path):
    latency = measure_latency(model, dataloader)
    size = measure_size(path)
    accuracy = measure_performance(model, dataloader)
    return latency, size, accuracy

# Suponiendo que tienes los dataloaders 'dataloader_train' y 'dataloader_test'
original_metrics = [
    evaluate_model(model1, dataloader_test, 'model1.pth'),
    evaluate_model(model2, dataloader_test, 'model2.pth'),
    evaluate_model(model3, dataloader_test, 'model3.pth')
]

quantized_metrics = [
    evaluate_model(quantized_model1, dataloader_test, 'quantized_model1.pth'),
    evaluate_model(quantized_model2, dataloader_test, 'quantized_model2.pth'),
    evaluate_model(quantized_model3, dataloader_test, 'quantized_model3.pth')
]

# Print results
for i, (orig, quant) in enumerate(zip(original_metrics, quantized_metrics)):
    print(f"Model {i+1} Original - Latency: {orig[0]:.4f} s, Size: {orig[1]} bytes, Accuracy: {orig[2]:.2f}%")
    print(f"Model {i+1} Quantized - Latency: {quant[0]:.4f} s, Size: {quant[1]} bytes, Accuracy: {quant[2]:.2f}%")
