In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader
import torch.nn as nn

In [3]:
np.random.seed(42)

In [None]:
class Datasets:
    def loader_cifar100(batch_size=64, num_workers=2):
        transform = T.Compose([
            T.ToTensor(),
            T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),  
        ])

        trainset = torchvision.datasets.CIFAR100(root="./data", train=True, download=True, transform=transform)
        testset  = torchvision.datasets.CIFAR100(root="./data", train=False, download=True, transform=transform)

        trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        testloader  = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

        print(f"[CIFAR100] Treino: {len(trainset)} | Teste: {len(testset)}")
        return trainset, testset, trainloader, testloader

    def loader_food101(batch_size=64, num_workers=2):
        transform = T.Compose([
            T.Resize((224, 224)),  
            T.ToTensor(),
            T.Normalize((0.545, 0.436, 0.342), (0.294, 0.275, 0.281))
        ])

        trainset = torchvision.datasets.Food101(root="./data", split="train", download=True, transform=transform)
        testset  = torchvision.datasets.Food101(root="./data", split="test", download=True, transform=transform)

        trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        testloader  = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

        print(f"[Food-101] Treino: {len(trainset)} | Teste: {len(testset)}")
        return trainset, testset, trainloader, testloader

    def loader_caltech256(batch_size=64, num_workers=2):
        transform = T.Compose([
            T.Resize((224, 224)),
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
        ])

        dataset = torchvision.datasets.Caltech256(root="./data", download=True, transform=transform)

        train_size = int(0.8 * len(dataset))
        test_size = len(dataset) - train_size
        trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])

        trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
        testloader  = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

        print(f"[Caltech-256] Treino: {len(trainset)} | Teste: {len(testset)}")
        return trainset, testset, trainloader, testloader

In [None]:
# CIFAR-100
cifar_train, cifar_test, cifar_trainloader, cifar_testloader = Datasets.loader_cifar100()

# Food-101
food_train, food_test, food_trainloader, food_testloader = Datasets.loader_food101()

# Caltech-256
caltech_train, caltech_test, caltech_trainloader, caltech_testloader = Datasets.loader_caltech256()

Algoritmo escolhido - VGG: compare sua variante com vgg11, vgg13, vgg16, vgg19 (com ou sem _bn).

protocolo de treino - (épocas, otimizador, LR schedule, augmentation)

Usar VGG como feature extractor e Transformer como classificador:

Blocos de convolução 3×3 empilhados (geralmente 2 ou 3 convs antes de um max pooling).

Camadas totalmente conectadas no final (ou um classificador simples).

Arquitetura “profunda e simples” (sem atalhos ou estruturas complexas).

In [None]:
import torch
import torch.nn as nn

class VGG_Autoral(nn.Module):
    def __init__(self, num_classes=1000, dropout=0.5):
        super(VGG_Autoral, self).__init__()
        
        self.features = nn.Sequential(
            # Bloco 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Bloco 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Bloco 3
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Bloco 4
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Bloco 5
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(256 * 7 * 7, 4096),  # supondo entrada 224x224
            nn.ReLU(True),
            nn.Dropout(dropout),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(dropout),
            nn.Linear(4096, num_classes),
        )
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Exemplo de criação da rede
model = VGG_Autoral(num_classes=1000)
print(model)


Número de filtros próprio (ex.: 32 → 64 → 128 → 256 → 256)

Batch Normalization após cada conv

Dropout ajustado no classificador