In [1]:
import numpy as np
import pandas as pd
import  matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets 
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import time
from thop import profile
from netcal.metrics import ECE

In [2]:
seed = np.random.seed(42)

In [None]:
import os
import shutil
import random
import urllib.request
import tarfile
import zipfile
import scipy.io
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

def download_and_extract_tgz(url, download_path, extract_path):
    if not os.path.exists(download_path):
        print(f"Baixando de {url}...")
        urllib.request.urlretrieve(url, download_path)
    else:
        print(f"{download_path} já existe, pulando download.")

    if not os.path.exists(os.path.join(extract_path, "jpg")):
        print("Extraindo...")
        with tarfile.open(download_path, "r:gz") as tar:
            tar.extractall(path=extract_path)
        print(f"Extração concluída em {extract_path}")
    else:
        print("Pasta 'jpg' já existe, pulando extração.")

def split_dataset(dataset, val_frac=0.1, seed=42):
    n_val = int(len(dataset) * val_frac)
    n_train = len(dataset) - n_val
    torch.manual_seed(seed)
    return random_split(dataset, [n_train, n_val])

def prepare_flowers102(data_root="data", seed=42):
    random.seed(seed)
    flowers_path = os.path.join(data_root, "flowers-102")
    os.makedirs(flowers_path, exist_ok=True)

    # Baixar imagens
    url_flowers = "https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz"
    tgz_path = os.path.join(flowers_path, "102flowers.tgz")
    img_folder = os.path.join(flowers_path, "jpg")

    if not os.path.exists(img_folder):
        download_and_extract_tgz(url_flowers, tgz_path, flowers_path)

    url_labels = "https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat"
    labels_path = os.path.join(flowers_path, "imagelabels.mat")
    if not os.path.exists(labels_path):
        print("Baixando labels...")
        urllib.request.urlretrieve(url_labels, labels_path)

    labels = scipy.io.loadmat(labels_path)['labels'][0]  # array 1D com valores 1 a 102
    all_images = sorted(os.listdir(img_folder))

    n_total = len(all_images)
    n_train = int(0.7 * n_total)
    n_val = int(0.15 * n_total)
    n_test = n_total - n_train - n_val

    splits = {
        "train": all_images[:n_train],
        "val": all_images[n_train:n_train+n_val],
        "test": all_images[n_train+n_val:]
    }

    for split_name, imgs in splits.items():
        split_dir = os.path.join(flowers_path, split_name)
        os.makedirs(split_dir, exist_ok=True)
        for img_name in imgs:
            idx = int(img_name[6:11]) - 1 
            label = str(labels[idx])
            label_dir = os.path.join(split_dir, label)
            os.makedirs(label_dir, exist_ok=True)
            shutil.copy(os.path.join(img_folder, img_name), os.path.join(label_dir, img_name))

    return os.path.join(flowers_path, "train"), os.path.join(flowers_path, "val"), os.path.join(flowers_path, "test")


def load_datasets(batch_size=64, seed=42):
    data_root = "data"
    os.makedirs(data_root, exist_ok=True)

    transform_train = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        transforms.RandomRotation(15),
        transforms.ToTensor(),
    ])

    transform_test = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])

    train_dir, val_dir, test_dir = prepare_flowers102(data_root, seed)
    train_flowers = datasets.ImageFolder(train_dir, transform=transform_train)
    val_flowers   = datasets.ImageFolder(val_dir, transform=transform_test)
    test_flowers  = datasets.ImageFolder(test_dir, transform=transform_test)

    tiny_path = os.path.join(data_root, "tiny-imagenet-200")
    if not os.path.exists(tiny_path):
        url_tiny = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
        zip_path = os.path.join(data_root, "tiny-imagenet-200.zip")
        urllib.request.urlretrieve(url_tiny, zip_path)
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(data_root)
        os.remove(zip_path)

    train_tiny = datasets.ImageFolder(os.path.join(tiny_path, 'train'), transform=transform_train)
    val_tiny, test_tiny = split_dataset(
        datasets.ImageFolder(os.path.join(tiny_path, 'val'), transform=transform_test), val_frac=0.5, seed=seed
    )
    train_tiny, val_tiny = split_dataset(train_tiny, val_frac=0.1, seed=seed)

    train_cifar_full = datasets.CIFAR100(root=data_root, train=True, transform=transform_train, download=True)
    train_cifar, val_cifar = split_dataset(train_cifar_full, val_frac=0.1, seed=seed)
    test_cifar = datasets.CIFAR100(root=data_root, train=False, transform=transform_test, download=True)

    train_loaders = {
        "Flowers-102": DataLoader(train_flowers, batch_size=batch_size, shuffle=True, num_workers=2),
        "Tiny-ImageNet": DataLoader(train_tiny, batch_size=batch_size, shuffle=True, num_workers=2),
        "CIFAR-100": DataLoader(train_cifar, batch_size=batch_size, shuffle=True, num_workers=2),
    }

    val_loaders = {
        "Flowers-102": DataLoader(val_flowers, batch_size=batch_size, shuffle=False, num_workers=2),
        "Tiny-ImageNet": DataLoader(val_tiny, batch_size=batch_size, shuffle=False, num_workers=2),
        "CIFAR-100": DataLoader(val_cifar, batch_size=batch_size, shuffle=False, num_workers=2),
    }

    test_loaders = {
        "Flowers-102": DataLoader(test_flowers, batch_size=batch_size, shuffle=False, num_workers=2),
        "Tiny-ImageNet": DataLoader(test_tiny, batch_size=batch_size, shuffle=False, num_workers=2),
        "CIFAR-100": DataLoader(test_cifar, batch_size=batch_size, shuffle=False, num_workers=2),
    }

    return train_loaders, val_loaders, test_loaders
if __name__ == "__main__":
    train_loaders, val_loaders, test_loaders = load_datasets()
    print("Todos os datasets estão prontos com train, val e test!")




Baixando de https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz...
Extraindo...
Extração concluída em data\flowers-102
Baixando labels...


100%|██████████| 169M/169M [00:13<00:00, 12.6MB/s] 


Todos os datasets estão prontos com train, val e test!
