In [2]:
import numpy as np
import pandas as pd
import  matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets 
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import time
from thop import profile
from netcal.metrics import ECE

In [3]:
seed = np.random.seed(42)

In [35]:
import os
import shutil
import random
import urllib.request
import tarfile
import zipfile
import scipy.io
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from torch.utils.data import Subset
from collections import defaultdict

def get_num_classes(dataset):
    while isinstance(dataset, torch.utils.data.Subset):
        dataset = dataset.dataset

    if hasattr(dataset, "classes"):
        return len(dataset.classes)
    else:
        raise AttributeError("O dataset base não possui atributo 'classes'.")


def stratified_subsample(dataset, samples_per_class=50, seed=42):
    random.seed(seed)
    targets = [label for _, label in dataset.samples]
    class_indices = defaultdict(list)
    for idx, label in enumerate(targets):
        class_indices[label].append(idx)

    selected_indices = []
    for label, indices in class_indices.items():
        n = min(samples_per_class, len(indices))
        selected_indices.extend(random.sample(indices, n))

    return Subset(dataset, selected_indices)

def download_and_extract_tgz(url, download_path, extract_path):
    if not os.path.exists(download_path):
        print(f"Baixando em {url} ")
        urllib.request.urlretrieve(url, download_path)
    else:
        print(f"{download_path} já existe")

    if not os.path.exists(os.path.join(extract_path, "jpg")):
        print("Extraindo...")
        with tarfile.open(download_path, "r:gz") as tar:
            tar.extractall(path=extract_path)
        print(f"Extração concluída em {extract_path}")
    else:
        print("Pasta 'jpg' já existe, pulando extração.")

def split_dataset(dataset, val_frac=0.1, seed=42):
    n_val = int(len(dataset) * val_frac)
    n_train = len(dataset) - n_val
    torch.manual_seed(seed)
    return random_split(dataset, [n_train, n_val])

def prepare_flowers102(data_root="data", seed=42):
    import numpy as np

    random.seed(seed)
    np.random.seed(seed)

    flowers_path = os.path.join(data_root, "flowers-102")
    os.makedirs(flowers_path, exist_ok=True)

    url_flowers = "https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz"
    tgz_path = os.path.join(flowers_path, "102flowers.tgz")
    img_folder = os.path.join(flowers_path, "jpg")

    if not os.path.exists(img_folder):
        download_and_extract_tgz(url_flowers, tgz_path, flowers_path)

    url_labels = "https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat"
    labels_path = os.path.join(flowers_path, "imagelabels.mat")
    if not os.path.exists(labels_path):
        print("Baixando labels...")
        urllib.request.urlretrieve(url_labels, labels_path)

    labels = scipy.io.loadmat(labels_path)['labels'][0]
    all_images = sorted(os.listdir(img_folder))

    class_to_images = {i: [] for i in range(1, 103)}
    for img_name in all_images:
        idx = int(img_name[6:11]) - 1
        label = int(labels[idx])
        class_to_images[label].append(img_name)

    for split_name in ["train", "val", "test"]:
        split_dir = os.path.join(flowers_path, split_name)
        os.makedirs(split_dir, exist_ok=True)

    for cls, imgs in class_to_images.items():
        random.shuffle(imgs)
        n_total = len(imgs)
        n_train = int(0.7 * n_total)
        n_val = int(0.15 * n_total)
        n_test = n_total - n_train - n_val

        splits = {
            "train": imgs[:n_train],
            "val": imgs[n_train:n_train + n_val],
            "test": imgs[n_train + n_val:]
        }

        for split_name, img_list in splits.items():
            label_dir = os.path.join(flowers_path, split_name, str(cls))
            os.makedirs(label_dir, exist_ok=True)
            for img_name in img_list:
                src = os.path.join(img_folder, img_name)
                dst = os.path.join(label_dir, img_name)
                if not os.path.exists(dst):
                    shutil.copy(src, dst)

    return (
        os.path.join(flowers_path, "train"),
        os.path.join(flowers_path, "val"),
        os.path.join(flowers_path, "test")
    )



def load_datasets(batch_size=64, seed=42):
    data_root = "data"
    os.makedirs(data_root, exist_ok=True)

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
        normalize
    ])

    transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize
    ])

    train_dir, val_dir, test_dir = prepare_flowers102(data_root, seed)
    train_flowers = datasets.ImageFolder(train_dir, transform=transform_train)
    val_flowers   = datasets.ImageFolder(val_dir, transform=transform_test)
    test_flowers  = datasets.ImageFolder(test_dir, transform=transform_test)

    tiny_path = os.path.join(data_root, "tiny-imagenet-200")
    if not os.path.exists(tiny_path):
        url_tiny = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
        zip_path = os.path.join(data_root, "tiny-imagenet-200.zip")
        urllib.request.urlretrieve(url_tiny, zip_path)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(data_root)
        os.remove(zip_path)

    train_tiny_full = datasets.ImageFolder(os.path.join(tiny_path, 'train'), transform=transform_train)
    val_tiny_full = datasets.ImageFolder(os.path.join(tiny_path, 'val'), transform=transform_test)

    train_tiny_full = stratified_subsample(train_tiny_full, samples_per_class=150, seed=seed)
    val_tiny_full = stratified_subsample(val_tiny_full, samples_per_class=30, seed=seed)

    train_tiny, val_extra = split_dataset(train_tiny_full, val_frac=0.1, seed=seed)

    test_tiny = val_tiny_full


    train_cifar_full = datasets.CIFAR100(root=data_root, train=True, transform=transform_train, download=True)
    train_cifar, val_cifar = split_dataset(train_cifar_full, val_frac=0.1, seed=seed)
    test_cifar = datasets.CIFAR100(root=data_root, train=False, transform=transform_test, download=True)

    train_loaders = {
        
        "Tiny-ImageNet": DataLoader(train_tiny, batch_size=batch_size, shuffle=True, num_workers=2),
        "CIFAR-100": DataLoader(train_cifar, batch_size=batch_size, shuffle=True, num_workers=2),
        "Flowers-102": DataLoader(train_flowers, batch_size=batch_size, shuffle=True, num_workers=2),
    }

    val_loaders = {
        
        "Tiny-ImageNet": DataLoader(val_extra, batch_size=batch_size, shuffle=False, num_workers=2),
        "CIFAR-100": DataLoader(val_cifar, batch_size=batch_size, shuffle=False, num_workers=2),
        "Flowers-102": DataLoader(val_flowers, batch_size=batch_size, shuffle=False, num_workers=2),
    }

    test_loaders = {
        
        "Tiny-ImageNet": DataLoader(test_tiny, batch_size=batch_size, shuffle=False, num_workers=2),
        "CIFAR-100": DataLoader(test_cifar, batch_size=batch_size, shuffle=False, num_workers=2),
        "Flowers-102": DataLoader(test_flowers, batch_size=batch_size, shuffle=False, num_workers=2),
    }

    return train_loaders, val_loaders, test_loaders


if __name__ == "__main__":
    train_loaders, val_loaders, test_loaders = load_datasets()
    print(" Todos os datasets estão prontos com train, val e test")





 Todos os datasets estão prontos com train, val e test


In [21]:
import torch
import torch.nn as nn

class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels, bias=False)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        x = self.bn(x)
        x = self.relu(x)
        return x


class SEBlock(nn.Module):
    def __init__(self, c, r=16):
        super(SEBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(c, c // r, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(c // r, c, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y


class FireX(nn.Module):
    def __init__(self, in_channels, squeeze_channels, expand1x1_channels, expand3x3_channels, use_se=False, dw_expand=False):
        super(FireX, self).__init__()
        self.use_se = use_se
        self.squeeze = nn.Sequential(
            nn.Conv2d(in_channels, squeeze_channels, kernel_size=1),
            nn.BatchNorm2d(squeeze_channels),
            nn.ReLU(inplace=True)
        )
        if dw_expand:
            self.expand1x1 = DepthwiseSeparableConv(squeeze_channels, expand1x1_channels, kernel_size=1, padding=0)
            self.expand3x3 = DepthwiseSeparableConv(squeeze_channels, expand3x3_channels, kernel_size=3, padding=1)
        else:
            self.expand1x1 = nn.Sequential(
                nn.Conv2d(squeeze_channels, expand1x1_channels, kernel_size=1),
                nn.BatchNorm2d(expand1x1_channels),
                nn.ReLU(inplace=True)
            )
            self.expand3x3 = nn.Sequential(
                nn.Conv2d(squeeze_channels, expand3x3_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(expand3x3_channels),
                nn.ReLU(inplace=True)
            )
        out_channels = expand1x1_channels + expand3x3_channels
        self.se = SEBlock(out_channels) if use_se else nn.Identity()

    def forward(self, x):
        x = self.squeeze(x)
        x1 = self.expand1x1(x)
        x3 = self.expand3x3(x)
        h = min(x1.shape[2], x3.shape[2])
        w = min(x1.shape[3], x3.shape[3])
        x1 = x1[:, :, :h, :w]
        x3 = x3[:, :, :h, :w]
        out = torch.cat([x1, x3], dim=1)
        return self.se(out)


class SqueezeNetAutoral(nn.Module):
    def __init__(self, num_classes=100, use_se=True, dw_expand=False, small=True):
        super(SqueezeNetAutoral, self).__init__()
        if small:
            self.features = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False),
                nn.BatchNorm2d(32),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(3, stride=2, padding=1),
                FireX(32, 8, 32, 32, use_se=use_se, dw_expand=dw_expand),
                FireX(64, 16, 48, 48, use_se=use_se, dw_expand=dw_expand),
                nn.MaxPool2d(3, stride=2, padding=1),
                FireX(96, 24, 64, 64, use_se=use_se, dw_expand=dw_expand),
                FireX(128, 24, 64, 64, use_se=use_se, dw_expand=dw_expand),
                nn.MaxPool2d(3, stride=2, padding=1),
                FireX(128, 32, 96, 96, use_se=use_se, dw_expand=dw_expand),
                FireX(192, 48, 128, 128, use_se=use_se, dw_expand=dw_expand)
            )
            final_planes = 256
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(3, stride=2, padding=1),
                FireX(64, 16, 64, 64, use_se=use_se, dw_expand=dw_expand),
                FireX(128, 16, 64, 64, use_se=use_se, dw_expand=dw_expand),
                nn.MaxPool2d(3, stride=2, padding=1),
                FireX(128, 32, 128, 128, use_se=use_se, dw_expand=dw_expand),
                FireX(256, 32, 128, 128, use_se=use_se, dw_expand=dw_expand),
                nn.MaxPool2d(3, stride=2, padding=1),
                FireX(256, 48, 192, 192, use_se=use_se, dw_expand=dw_expand),
                FireX(384, 64, 256, 256, use_se=use_se, dw_expand=dw_expand)
            )
            final_planes = 512
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Dropout(p=0.5),
            nn.Linear(final_planes, num_classes)
        )
        self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if getattr(m, "bias", None) is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                if getattr(m, "bias", None) is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if getattr(m, "bias", None) is not None:
                    nn.init.constant_(m.bias, 0)



In [7]:
import torch
import torch.nn.functional as F
import time
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from thop import profile
import psutil
import GPUtil

class Metrics:

    @staticmethod
    def compute_ece(probs, labels, n_bins=15):
        confidences = np.max(probs, axis=1)       
        predictions = np.argmax(probs, axis=1)    
        accuracies = predictions == labels

        bins = np.linspace(0.0, 1.0, n_bins + 1)
        bin_indices = np.digitize(confidences, bins) - 1

        ece = 0.0
        for i in range(n_bins):
            mask = bin_indices == i
            if np.any(mask):
                bin_acc = np.mean(accuracies[mask])
                bin_conf = np.mean(confidences[mask])
                ece += np.abs(bin_acc - bin_conf) * np.mean(mask)
        return ece

    @staticmethod
    def evaluate_model(model, dataloader, device='cuda'):
        model.to(device) 
        model.eval()
        all_preds, all_labels, all_probs = [], [], []
        total_loss = 0.0
        criterion = torch.nn.CrossEntropyLoss()

        with torch.no_grad():  
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_loss += loss.item() * inputs.size(0)

                probs = F.softmax(outputs, dim=1).cpu().numpy()
                preds = np.argmax(probs, axis=1)

                all_probs.append(probs)
                all_preds.append(preds)
                all_labels.append(labels.cpu().numpy())

        all_probs = np.concatenate(all_probs)
        all_preds = np.concatenate(all_preds)
        all_labels = np.concatenate(all_labels)

        acc = accuracy_score(all_labels, all_preds)
        prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
        rec = recall_score(all_labels, all_preds, average='macro', zero_division=0)
        f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)
        cm = confusion_matrix(all_labels, all_preds)
        ece = Metrics.compute_ece(all_probs, all_labels)

        return {
            "Acurácia": acc,
            "Precisão": prec,
            "Recall": rec,
            "F1": f1,
            "Matriz_Confusão": cm,
            "ECE": ece,
            "Loss_Média": total_loss / len(dataloader.dataset)
        }

    @staticmethod
    def get_model_complexity(model, input_size=(1, 3, 224, 224)):
        input_tensor = torch.randn(*input_size)
        device = next(model.parameters()).device
        input_tensor = input_tensor.to(device)
        flops, params = profile(model, inputs=(input_tensor,), verbose=False)
        return {"FLOPs": flops, "Parâmetros": params}

    @staticmethod
    def benchmark_model(model, device='cuda', input_size=(1, 3, 224, 224), runs=50):
        model.to(device)
        model.eval()
        x = torch.randn(input_size).to(device)

        for _ in range(10):
            _ = model(x)

        torch.cuda.synchronize() if device == 'cuda' else None
        start_time = time.time()

        for _ in range(runs):
            _ = model(x)

        torch.cuda.synchronize() if device == 'cuda' else None
        elapsed = (time.time() - start_time) / runs

        if device == 'cuda':
            GPU = GPUtil.getGPUs()[0]
            mem_used = GPU.memoryUsed
        else:
            mem_used = psutil.virtual_memory().used / (1024 ** 3)

        return {"Latência (s)": elapsed, "Memória (MB)": mem_used}

    @staticmethod
    def comparar_modelos(modelos, dataloader, device='cuda'):
        resultados = {}
        for nome, modelo in modelos.items():
            modelo.to(device)
            metricas = Metrics.evaluate_model(modelo, dataloader, device)
            complexidade = Metrics.get_model_complexity(modelo)
            desempenho = Metrics.benchmark_model(modelo, device)
            resultados[nome] = {**metricas, **complexidade, **desempenho}
        return resultados


In [8]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

def top_confused_classes(cm, class_names, top_k=10):
    cm_errors = cm.copy()
    np.fill_diagonal(cm_errors, 0)

    flat_indices = np.argsort(cm_errors.flatten())[-top_k:]
    rows, cols = np.unravel_index(flat_indices, cm_errors.shape)

    selected_classes = np.unique(np.concatenate([rows, cols]))
    selected_cm = cm[np.ix_(selected_classes, selected_classes)]
    selected_class_names = [class_names[i] for i in selected_classes]

    return selected_cm, selected_class_names

def plot_top_confusion(cm, class_names, top_k=10, model_name="Modelo"):
    selected_cm, selected_class_names = top_confused_classes(cm, class_names, top_k)
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(selected_cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=selected_class_names, yticklabels=selected_class_names)
    plt.xlabel("Predito")
    plt.ylabel("Real")
    plt.title(f"Matriz de Confusão - Top {top_k} Confusões - {model_name}")
    plt.tight_layout()
    plt.show()

def plot_metric_comparison(resultados, metric_name):
    nomes = list(resultados.keys())
    valores = [resultados[m][metric_name] for m in nomes]

    plt.figure(figsize=(8, 5))
    plt.bar(nomes, valores, color=['#0077b6', '#00b4d8', '#90e0ef'])
    plt.title(f"Comparação de {metric_name}")
    plt.ylabel(metric_name)
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.show()

In [14]:
from torchvision import models
import torch, torch.nn as nn, torch.optim as optim
import gc, time
import copy

def initialize_weights(model):
    for m in model.modules():
        if isinstance(m, nn.Conv2d):
            if m.weight is not None:
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            if m.weight is not None:
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            if m.weight is not None:
                nn.init.constant_(m.weight, 1)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)


def train_model(model, train_loader, val_loader, device='cuda',
                optimizer_name="AdamW", lr=0.001, momentum=0.9, weight_decay=1e-4,
                scheduler_type="cosine", epochs=50, patience=10, initialize=False):

    model.to(device)
    criterion = nn.CrossEntropyLoss()

    if optimizer_name.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay)
    elif optimizer_name.lower() == "adamw":
        optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)

    scheduler = (
        optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
        if scheduler_type.lower() == "cosine"
        else optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
    )

    if initialize:
        initialize_weights(model)

    best_val_acc, best_epoch = 0.0, 0
    best_model_wts, early_counter = None, 0
    history = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": []}

    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0.0, 0, 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * inputs.size(0)
            correct += (outputs.argmax(1) == labels).sum().item()
            total += labels.size(0)

        train_loss, train_acc = total_loss / total, correct / total

        model.eval()
        val_loss, val_correct, val_total = 0.0, 0, 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                val_correct += (outputs.argmax(1) == labels).sum().item()
                val_total += labels.size(0)

        val_loss, val_acc = val_loss / val_total, val_correct / val_total
        scheduler.step()

        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["train_acc"].append(train_acc)
        history["val_acc"].append(val_acc)

        print(f"Epoch [{epoch+1}/{epochs}] | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

        if val_acc > best_val_acc:
            best_val_acc, best_epoch = val_acc, epoch + 1
            best_model_wts = copy.deepcopy(model.state_dict())
            early_counter = 0
        else:
            early_counter += 1
            if early_counter >= patience:
                print(f"Early stopping na época {epoch+1}")
                break

    if best_model_wts is not None:
        model.load_state_dict(best_model_wts)
        print(f" Melhor modelo: época {best_epoch}, Val Acc = {best_val_acc:.4f}")

    return model, history



In [None]:
def get_classes_from_loader(loader):
    dataset = loader.dataset
    while hasattr(dataset, "dataset"):
        dataset = dataset.dataset
    return dataset.classes

def run_experimento(modelos, train_loader, val_loader, device, epochs=50, patience=10):
    resultados_treino = {}
    modelos_treinados = {}
    for nome, modelo in modelos.items():
        print(f"\n Treinando {nome}...\n")
        modelo_treinado, historico = train_model(
            modelo,
            train_loader,
            val_loader,
            device=device,
            optimizer_name="AdamW",
            lr=1e-4,
            epochs=epochs,
            patience=patience
        )
        resultados_treino[nome] = historico
        modelos_treinados[nome] = modelo_treinado
        torch.cuda.empty_cache()
        gc.collect()
    return resultados_treino, modelos_treinados

if __name__ == "__main__":
    import torch
    from torchvision import models
    import torch.nn as nn
    import matplotlib.pyplot as plt
    import gc

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Usando dispositivo: {device}")

    train_loaders, val_loaders, test_loaders = load_datasets()
    resultados_totais = {}

    def plot_metric_comparison(resultados, metric_name):
        nomes = list(resultados.keys())
        valores = [resultados[m][metric_name] for m in nomes]
        plt.figure(figsize=(8, 5))
        plt.bar(nomes, valores, color=['#0077b6', '#00b4d8', '#90e0ef'])
        plt.title(f"Comparação de {metric_name}")
        plt.ylabel(metric_name)
        plt.grid(axis='y', linestyle='--', alpha=0.6)
        plt.show()

    for dataset_name in train_loaders.keys():
        print(f"\n===== Treinando e avaliando dataset: {dataset_name} =====")
        train_loader = train_loaders[dataset_name]
        val_loader = val_loaders[dataset_name]
        test_loader = test_loaders[dataset_name]
        num_classes = get_num_classes(train_loader.dataset)

        modelos = {
            "SqueezeNet1_0": models.squeezenet1_0(weights=None),
            "SqueezeNet_Autoral": SqueezeNetAutoral(num_classes=num_classes, use_se=True, dw_expand=True, small=True),
            "SqueezeNet1_1": models.squeezenet1_1(weights=None)
        }

        # Ajusta os classifiers e inicializa pesos
        for nome, modelo in modelos.items():
            if "Autoral" not in nome:
                # Ajusta primeira camada para imagens pequenas
                modelo.features[0] = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1)
                
                # Ajusta o classifier para o número de classes
                modelo.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1, 1))
                modelo.num_classes = num_classes
                
                # Inicializa pesos
                initialize_weights(modelo)

            modelo.to(device)


        # Define epochs dependendo do dataset
        if dataset_name in ["CIFAR-100", "Tiny-ImageNet"]:
            epochs = 50
        else:
            epochs = 100  

        print(f"\n=== Iniciando treinamento por {epochs} épocas ===")
        resultados_treino, modelos_treinados = run_experimento(
            modelos=modelos,
            train_loader=train_loader,
            val_loader=val_loader,
            device=device,
            epochs=epochs,
            patience=12
        )

        for model_name in modelos.keys():
            print(f"\n--- Avaliando modelo: {model_name} ---")
            model_treinado = modelos_treinados[model_name]
            metricas = Metrics.evaluate_model(model_treinado, test_loader, device=device)
            complexidade = Metrics.get_model_complexity(model_treinado)
            desempenho = Metrics.benchmark_model(model_treinado, device=device)
            resultados_totais[(dataset_name, model_name)] = {
                "Teste": metricas,
                "Treino": resultados_treino[model_name],
                "Complexidade": complexidade,
                "Desempenho": desempenho
            }
            for k, v in metricas.items():
                print(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}")
            plot_top_confusion(
                metricas['Matriz_Confusão'],
                class_names=get_classes_from_loader(train_loader),
                top_k=10,
                model_name=model_name
            )

        resultados_acuracia = {name: resultados_totais[(dataset_name, name)]["Teste"] for name in modelos.keys()}
        plot_metric_comparison(resultados_acuracia, "Acurácia")

        gc.collect()
        torch.cuda.empty_cache()








Usando dispositivo: cuda

===== Treinando e avaliando dataset: Tiny-ImageNet =====

=== Iniciando treinamento por 50 épocas ===

 Treinando SqueezeNet1_0...

Epoch [1/50] | Train Acc: 0.0101 | Val Acc: 0.0113
Epoch [2/50] | Train Acc: 0.0127 | Val Acc: 0.0217
Epoch [3/50] | Train Acc: 0.0177 | Val Acc: 0.0200
Epoch [4/50] | Train Acc: 0.0221 | Val Acc: 0.0213
Epoch [5/50] | Train Acc: 0.0245 | Val Acc: 0.0293
Epoch [6/50] | Train Acc: 0.0274 | Val Acc: 0.0293
Epoch [7/50] | Train Acc: 0.0321 | Val Acc: 0.0300
Epoch [8/50] | Train Acc: 0.0354 | Val Acc: 0.0400
Epoch [9/50] | Train Acc: 0.0426 | Val Acc: 0.0480


In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
print(torch.version.cuda)


True
NVIDIA GeForce RTX 5070 Ti
12.8


In [39]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()
