In [None]:
import numpy as np 
import pandas as pd 
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.model_selection import train_test_split

# Sobre os dados

O WikiArt Art Movements/Styles é um conjunto de dados amplamente utilizado em pesquisas de classificação de estilos artísticos e aprendizado de representação visual.
Ele foi construído a partir do site WikiArt.org
, que reúne obras digitalizadas de centenas de artistas de diversas épocas, estilos e movimentos artísticos.

Sobre os conjuntos selecionados:

- Japanese Art 

    - Traços planos, uso de linhas limpas, pouca profundidade, tons pastéis, e forte presença de natureza e figuras estilizadas.

- Baroque 

    - Pinturas escuras, com forte contraste de luz e sombra (chiaroscuro), cenas dramáticas, expressões intensas e composição teatral.

- Art Nouveau 

    - Linhas curvas, formas orgânicas, motivos florais e cores suaves.

- Primitivism (Naïve Art)

    - Formas simplificadas, ausência de perspectiva correta, cores vibrantes e figuras "infantis".

- Renaissance (Western)

    - Equilíbrio, perspectiva linear, realismo anatômico, temas religiosos ou mitológicos.

A escolha dos cinco estilos citados baseia-se em critérios de separabilidade visual, otimizando a aprendizagem do modelo e reduzindo ambiguidades.


Cada categoria apresenta:

- Paleta de cores distinta,

- Nível de detalhamento próprio,

- Estrutura composicional diferenciada,

- Referenciais culturais únicos.

Isso reduz o overlap entre classes, facilitando a convergência do modelo e a avaliação da acurácia real de classificação.

# Carregamento

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Transformação base (sem augmentation)
base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])
#dir = "/kaggle/input/pinturas-df/database"
#dataset = datasets.ImageFolder(dir, transform=base_transform)

#print("Classes:", dataset.classes)
#print("Total de imagens:", len(dataset))

In [None]:
# from collections import Counter

# class_counts = Counter([label for _, label in dataset])
# for cls, count in zip(dataset.classes, class_counts.values()):
    # print(f"{cls}: {count} imagens")

Art_Nouveau: 3035 imagens

Baroque: 5312 imagens

Japanese_Art: 2235 imagens

Primitivism: 1324 imagens

Renaissance: 6192 imagens

- Calculando a média e o desvio padrão para normalização das imagens

In [None]:

#from tqdm import tqdm

#loader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=4)

#mean = torch.zeros(3)
#std = torch.zeros(3)

#print("Calculando média e desvio padrão...")
#for images, _ in tqdm(loader):
    # images.shape = [batch, channels, height, width]
    #batch_samples = images.size(0)
    #images = images.view(batch_samples, images.size(1), -1)  # [batch, channels, pixels]
    #mean += images.mean(2).sum(0)
    #std += images.std(2).sum(0)

#mean /= len(dataset)
#std /= len(dataset)

#print(f"\nMédia por canal: {mean}")
#print(f"Desvio padrão por canal: {std}")


> Média e desvio padrão dos canais:

> mean=[0.4997, 0.4385, 0.3752]
> 
> std=[0.2115, 0.1954, 0.1748]

## Realizando processamento e augmentation

In [None]:
dir = "/kaggle/input/pinturas-df/database"
base_dataset = datasets.ImageFolder(dir)

classes_desejadas = ['Art_Nouveau', 'Baroque', 'Japanese_Art', 'Primitivism']

# Filtra os índices das classes desejadas
indices_filtrados = [
    i for i, (_, label) in enumerate(base_dataset)
    if base_dataset.classes[label] in classes_desejadas
]

# Cria o subset filtrado
dataset = Subset(base_dataset, indices_filtrados)

# Atualiza as classes e o mapeamento
dataset.dataset.classes = classes_desejadas
dataset.dataset.class_to_idx = {cls: i for i, cls in enumerate(classes_desejadas)}

print("Classes escolhidas:", dataset.dataset.classes)
print("Total de imagens após filtro:", len(dataset))

In [None]:
augmentations = {
    "Art_Nouveau": transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
        transforms.RandomHorizontalFlip(p=0.4),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.1, contrast=0.1),
        transforms.RandomPerspective(distortion_scale=0.05, p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4997, 0.4385, 0.3752],
                             std=[0.2115, 0.1954, 0.1748]),
        transforms.RandomErasing(p=0.3)
    ]),
    "Japanese_Art": transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
        transforms.RandomHorizontalFlip(p=0.4),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.1, contrast=0.1),
        transforms.RandomPerspective(distortion_scale=0.05, p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4997, 0.4385, 0.3752],
                             std=[0.2115, 0.1954, 0.1748]),
        transforms.RandomErasing(p=0.3)
        
    ]),
    "Primitivism": transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
        transforms.RandomHorizontalFlip(p=0.4),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.1, contrast=0.1),
        transforms.RandomPerspective(distortion_scale=0.05, p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4997, 0.4385, 0.3752],
                             std=[0.2115, 0.1954, 0.1748]),
        transforms.RandomErasing(p=0.3)
    ]),
    "Baroque": transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4997, 0.4385, 0.3752],
                             std=[0.2115, 0.1954, 0.1748]),
        transforms.RandomErasing(p=0.3)
    ])
}

base_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4997, 0.4385, 0.3752],
                         std=[0.2115, 0.1954, 0.1748]),
])

In [None]:
class AugmentationClasse():
    def __init__(self, dataset, indices, augmentations, base_transform):
        self.dataset = dataset
        self.indices = indices
        self.augmentations = augmentations
        self.base_transform = base_transform
        self.class_to_idx = dataset.dataset.class_to_idx
        self.idx_to_class = {v: k for k, v in self.class_to_idx.items()}

    def __len__(self):
        return len(self.indices)

    def __getitem__(self, idx):
        real_idx = self.indices[idx]
        img, label = self.dataset[real_idx]
        class_name = self.idx_to_class[label]

        transform = self.augmentations.get(class_name, self.base_transform)
        img = transform(img)
        return img, label

In [None]:
labels = [base_dataset.imgs[i][1] for i in indices_filtrados]

# Split
train_idx, test_idx = train_test_split(
    list(range(len(dataset))),
    test_size=0.2,
    stratify=labels,
    random_state=42
)

train_dataset = AugmentationClasse(dataset, train_idx, augmentations, base_transform)
test_dataset = AugmentationClasse(dataset, test_idx, {}, base_transform)

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

In [None]:
from collections import Counter
import matplotlib.pyplot as plt

def plot_class_distribution(dataset, title):
    class_counts = Counter()
    for _, label in dataset:
        class_counts[label] += 1
    idx_to_class = dataset.idx_to_class
    class_names = [idx_to_class[i] for i in sorted(idx_to_class.keys())]
    counts = [class_counts[i] for i in range(len(class_names))]

    plt.figure(figsize=(8, 4))
    plt.bar(class_names, counts, color="skyblue")
    plt.title(title)
    plt.ylabel("Quantidade de imagens")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


# print("Distribuição no treino:")
# plot_class_distribution(train_dataset, "Distribuição Estratificada (Treino)")

# print("Distribuição no teste:")
# plot_class_distribution(test_dataset, "Distribuição Estratificada (Teste)")

## Criando oversample para as classes minirotarias 

In [None]:
from torch.utils.data import WeightedRandomSampler

train_labels = [dataset.dataset.imgs[i][1] for i in train_idx]
class_counts = Counter(train_labels)

# peso inversamente proporcional à frequência
num_samples = len(train_labels)
class_weights = {cls: num_samples / count for cls, count in class_counts.items()}

sample_weights = [class_weights[label] for label in train_labels]

sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),  # mantém mesmo tamanho do treino
    replacement=True  # permite repetição (oversampling)
)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    sampler=sampler,
    num_workers=4
)

In [None]:
import matplotlib.pyplot as plt
import torch

def denormalize(img_tensor, mean, std):
    mean = torch.tensor(mean).view(3, 1, 1)
    std = torch.tensor(std).view(3, 1, 1)
    return img_tensor * std + mean

mean = [0.4997, 0.4385, 0.3752]
std = [0.2115, 0.1954, 0.1748]

num_imgs = 5

# plt.figure(figsize=(20, 10))
# for i in range(num_imgs):
    #img, label = train_dataset[i]  # obtém uma imagem augmentada e o rótulo
    #class_name = train_dataset.idx_to_class[label]

    # Desnormaliza e converte para formato (H, W, C)
    #img_denorm = denormalize(img, mean, std).permute(1, 2, 0).clamp(0, 1)

    #plt.subplot(1, num_imgs, i + 1)
    #plt.imshow(img_denorm)
    #plt.title(f"{class_name}", fontsize=12)
    #plt.axis("off")

#plt.suptitle("Visualização das Aumentações por Classe", fontsize=14)
#plt.tight_layout()
#plt.show()


In [None]:
num_samples_to_simulate = 2000  # número de amostras para simular
simulated_indices = list(sampler)[:num_samples_to_simulate]

# Recupera as classes correspondentes
simulated_labels = [train_labels[i] for i in simulated_indices]

# Conta quantas vezes cada classe foi sorteada
simulated_counts = Counter(simulated_labels)

# Plota a distribuição simulada
plt.figure(figsize=(10, 5))
plt.bar(
    [dataset.dataset.classes[i] for i in simulated_counts.keys()],
    simulated_counts.values(),
    color="skyblue"
)
plt.title("Distribuição simulada após WeightedRandomSampler (oversampling aplicado)")
plt.xlabel("Classes")
plt.ylabel("Número de amostras simuladas")
plt.xticks(rotation=45)
plt.show()

# Recuperando o vetor latente através das características extraídas pela CNN

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class PinturasCNN(nn.Module):
    def __init__(self, num_classes, latent_dim=128, dropout_p=0.6):
        super().__init__()

        # inicializacao
        self.stem = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True)
        )

        # convulcoes
        self.block1 = self._conv_block(32, 64)
        self.block2 = self._conv_block(64, 128)
        self.block3 = self._conv_block(128, 192)

        # pooling global 1x1 pra gerar um vetor latente mais leve
        self.gap = nn.AdaptiveAvgPool2d((1, 1))

        # camadas pra previsao
        self.fc_latent = nn.Linear(192, latent_dim)
        self.bn_latent = nn.BatchNorm1d(latent_dim)
        self.dropout = nn.Dropout(dropout_p)
        self.fc_out = nn.Linear(latent_dim, num_classes)

        # iniciando kaming
        self._init_weights()

    def _conv_block(self, in_ch, out_ch):
        return nn.Sequential(
            nn.Conv2d(in_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_ch, out_ch, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
            elif isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)

    def forward(self, x, return_latent=False):
        x = self.stem(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.gap(x)
        x = torch.flatten(x, 1)

        latent = self.fc_latent(x)
        latent = self.bn_latent(latent)
        latent = F.relu(latent)

        x = self.dropout(latent)
        logits = self.fc_out(x)
        return logits, latent


## Treinamento

In [None]:
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(dataset.dataset.classes)
latent_dim = 256
learning_rate = 3e-4
epochs = 100
early_stop_patience = 10
checkpoint_path = "/kaggle/working/melhor_modelo.pth"

model = PinturasCNN(num_classes=num_classes, latent_dim=latent_dim).to(device)

In [None]:
'''
from torch.cuda.amp import autocast, GradScaler
import torch
import numpy as np
from tqdm import tqdm

criterion = nn.CrossEntropyLoss(label_smoothing=0.1) 
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=15,         # número de épocas até um ciclo completo
    eta_min=1e-5      # LR mínimo
)
scaler = torch.amp.GradScaler('cuda' if torch.cuda.is_available() else 'cpu')  # combina float16 e float32 pra acelerar o treinamento
device_type = 'cuda' if torch.cuda.is_available() else 'cpu'

best_val_loss = np.inf
patience_counter = 0

for epoch in range(1, epochs + 1):
    model.train()
    train_loss, correct, total = 0.0, 0, 0

    for imgs, labels in tqdm(train_loader, desc=f"Treino Época {epoch}/{epochs}"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()

        with torch.amp.autocast(device_type=device_type):
            logits, _ = model(imgs)
            loss = criterion(logits, labels)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)  # evita exploding grads
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item() * imgs.size(0)
        preds = logits.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_loss /= total
    train_acc = correct / total

    # validacao
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad(), torch.amp.autocast(device_type=device_type):
        for imgs, labels in tqdm(test_loader, desc="Validação"):
            imgs, labels = imgs.to(device), labels.to(device)
            logits, _ = model(imgs)
            loss = criterion(logits, labels)

            val_loss += loss.item() * imgs.size(0)
            preds = logits.argmax(1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

    val_loss /= val_total
    val_acc = val_correct / val_total
    scheduler.step(val_loss)

    print(f"Época {epoch}: Loss Treino={train_loss:.4f}, Acc Treino={train_acc:.3f}, Loss Val={val_loss:.4f}, Acc Val={val_acc:.3f}")

    if val_loss < best_val_loss - 1e-4:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), checkpoint_path)
        print("Melhor modelo salvo!")
    else:
        patience_counter += 1
        if patience_counter >= early_stop_patience:
            print("Early stopping acionado.")
            break

print("Treino finalizado com sucesso.")
'''

> Melhores resultados obtidos:

> Época 25:
> - Loss Treino=0.6583, Acc Treino=0.846
> - Loss Val=0.9219, Acc Val=0.690 

# Transfer Learning com ImageNet

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

class PinturasCNN_TL(nn.Module):
    def __init__(self, num_classes, latent_dim=256, dropout_p=0.5, freeze_backbone=True):
        super().__init__()

        base_model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

        self.backbone = nn.Sequential(*list(base_model.children())[:-1])  # até o avgpool
        backbone_out_dim = base_model.fc.in_features  # normalmente 512

        if freeze_backbone:
            for param in self.backbone.parameters():
                param.requires_grad = False

        self.fc_latent = nn.Linear(backbone_out_dim, latent_dim)
        self.bn_latent = nn.BatchNorm1d(latent_dim)
        self.dropout = nn.Dropout(dropout_p)

        self.fc_out = nn.Linear(latent_dim, num_classes)

        nn.init.kaiming_normal_(self.fc_latent.weight, nonlinearity='relu')
        nn.init.xavier_uniform_(self.fc_out.weight)

    def forward(self, x, return_latent=False):
        # Extrator de características
        x = self.backbone(x)               # Saída: [B, 512, 1, 1]
        x = torch.flatten(x, 1)            # [B, 512]

        # Vetor latente
        latent = self.fc_latent(x)
        latent = self.bn_latent(latent)
        latent = F.relu(latent)

        # Classificação
        out = self.dropout(latent)
        logits = self.fc_out(out)

        if return_latent:
            return logits, latent
        return logits, latent


## Treinemento

In [None]:
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler
import numpy as np
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(dataset.dataset.classes)
latent_dim = 256
learning_rate = 3e-4
epochs = 150
early_stop_patience = 10
checkpoint_path = "/kaggle/working/melhor_modelo_tl.pth"

# Instancia o modelo
model = PinturasCNN_TL(num_classes=num_classes, latent_dim=latent_dim, freeze_backbone=True).to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-5)
print(device)
scaler = torch.amp.GradScaler(device)

best_val_loss = np.inf
patience_counter = 0

In [None]:
'''
for epoch in range(1, epochs + 1):
    model.train()
    train_loss, correct, total = 0.0, 0, 0

    for imgs, labels in tqdm(train_loader, desc=f"Treino Época {epoch}/{epochs}"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()

        with torch.amp.autocast('cuda'):
            logits, _ = model(imgs)
            loss = criterion(logits, labels)

        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item() * imgs.size(0)
        preds = logits.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_loss /= total
    train_acc = correct / total

    # validacao
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad(), torch.amp.autocast('cuda'):
        for imgs, labels in tqdm(test_loader, desc="Validação"):
            imgs, labels = imgs.to(device), labels.to(device)
            logits, _ = model(imgs)
            loss = criterion(logits, labels)

            val_loss += loss.item() * imgs.size(0)
            preds = logits.argmax(1)
            val_correct += (preds == labels).sum().item()
            val_total += labels.size(0)

    val_loss /= val_total
    val_acc = val_correct / val_total
    scheduler.step()

    print(f"Época {epoch:03d}: "
          f"Loss Treino={train_loss:.4f}, Acc Treino={train_acc:.3f}, "
          f"Loss Val={val_loss:.4f}, Acc Val={val_acc:.3f}")

    if val_loss < best_val_loss - 1e-4:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), checkpoint_path)
        print("Melhor modelo salvo!")
    else:
        patience_counter += 1
        if patience_counter >= early_stop_patience:
            print("Early stopping acionado.")
            break

print("Treinamento finalizado com sucesso!")
'''

Melhor modelo:

- Loss Treino=0.6288, Acc Treino=0.871 

- Loss Val=0.7206, Acc Val=0.808

# Carregando o modelo para extração do vetor latente 

In [None]:
# Carregar o melhor modelo
caminho = '/kaggle/input/modelo-tl/pytorch/default/1/melhor_modelo_tl.pth'
model.load_state_dict(torch.load(caminho))
model.eval()

latents_list = []
labels_list = []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader, desc="Extraindo vetores latentes"):
        imgs = imgs.to(device)
        _, latents = model(imgs, return_latent=True)
        latents_list.append(latents.cpu())
        labels_list.append(labels)

latents_all = torch.cat(latents_list)
labels_all = torch.cat(labels_list)

print("Latents extraídos:", latents_all.shape)

In [None]:
from sklearn.cluster import MiniBatchKMeans
from sklearn.metrics import silhouette_score, normalized_mutual_info_score, adjusted_rand_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, normalize
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

X = latents_all.numpy()
y = labels_all.numpy()
X = normalize(X)

# readuz a dimensionalidade para estabilidade do KMeans
pca = PCA(n_components=100, random_state=42)
X_pca = pca.fit_transform(X)

# igualando a quantidade de clusters por numero de classes
n_clusters = num_classes  

# MiniBatch KMeans
kmeans = MiniBatchKMeans(
    n_clusters=n_clusters,
    batch_size=256,
    random_state=42,
    n_init='auto'
)
kmeans.fit(X_pca)

clusters = kmeans.predict(X_pca)

# metrica de agrupamento
silhouette_avg = silhouette_score(X_pca, clusters)
print(f"Coeficiente de Silhouette Global: {silhouette_avg:.3f}")

# agrupa por classe original para ver quais são mais bem representadas
df_eval = pd.DataFrame({"classe": y, "cluster": clusters})
compactness_by_class = df_eval.groupby("classe")["cluster"].nunique()
print("\nNúmero de clusters distintos por classe:\n", compactness_by_class)

# reducao para visualização
pca_2d = PCA(n_components=2)
X_2d = pca_2d.fit_transform(X_pca)

fig, axes = plt.subplots(1, 2, figsize=(15, 6))

sns.scatterplot(
    ax=axes[0],
    x=X_2d[:, 0],
    y=X_2d[:, 1],
    hue=clusters,
    palette='tab10',
    s=30,
    alpha=0.8,
    linewidth=0
)
axes[0].set_title("Clusters aprendidos pelo KMeans (PCA 2D)", fontsize=13)
axes[0].set_xlabel("Componente Principal 1")
axes[0].set_ylabel("Componente Principal 2")
axes[0].legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc='upper left')

sns.scatterplot(
    ax=axes[1],
    x=X_2d[:, 0],
    y=X_2d[:, 1],
    hue=[dataset.dataset.classes[i] for i in y],
    palette='Set2',
    s=30,
    alpha=0.8,
    linewidth=0
)
axes[1].set_title("Classes originais (PCA 2D)", fontsize=13)
axes[1].set_xlabel("Componente Principal 1")
axes[1].set_ylabel("Componente Principal 2")
axes[1].legend(title="Classe", bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

# Recortando os clusters para identificar padrões 

In [None]:
from torch.utils.data import Subset, DataLoader
from torchvision import transforms, datasets

classe_alvo = "Baroque"

transform_infer = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4997, 0.4385, 0.3752],
                         std=[0.2115, 0.1954, 0.1748]),
])

dataset_infer = datasets.ImageFolder(
    root=dataset.dataset.root, 
    transform=transform_infer
)

idx_classe = dataset_infer.class_to_idx[classe_alvo]
indices_classe = [
    i for i, (_, label) in enumerate(dataset_infer.samples)
    if label == idx_classe
]

subset_classe = Subset(dataset_infer, indices_classe)

subset_loader = DataLoader(
    subset_classe,
    batch_size=32,
    shuffle=False,
    num_workers=4
)

print(f"{classe_alvo}: {len(subset_classe)} imagens selecionadas para extração de embeddings.")

In [None]:
import torch
import numpy as np
from tqdm import tqdm

# modelo treinado
model.load_state_dict(torch.load("/kaggle/input/modelo-tl/pytorch/default/1/melhor_modelo_tl.pth", map_location=device))
model.eval()

# embeddings latentes
latents = []
with torch.no_grad():
    for imgs, _ in tqdm(subset_loader, desc=f"Extraindo latentes de {classe_alvo}"):
        imgs = imgs.to(device)
        _, latent = model(imgs, return_latent=True)
        latents.append(latent.cpu().numpy())

latents = np.concatenate(latents, axis=0)
print("Latentes extraidos:", latents.shape)

In [None]:
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, normalize
from scipy.spatial.distance import cdist
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

scaler = StandardScaler()
latents_norm = scaler.fit_transform(latents)
latents_norm = normalize(latents_norm)

k = 4  # ajuste conforme necessário
kmeans = KMeans(n_clusters=k, random_state=42)
clusters = kmeans.fit_predict(latents_norm)
centroids = kmeans.cluster_centers_

print("Distribuição dos subclusters:")
unique, counts = np.unique(clusters, return_counts=True)
for c, n in zip(unique, counts):
    print(f"Cluster {c}: {n} imagens")

pca = PCA(n_components=2)
latents_2d = pca.fit_transform(latents_norm)
centroids_2d = pca.transform(centroids)

plt.figure(figsize=(9, 7))
sns.scatterplot(
    x=latents_2d[:, 0], y=latents_2d[:, 1],
    hue=clusters, palette="tab10", s=35, alpha=0.8
)
plt.scatter(
    centroids_2d[:, 0], centroids_2d[:, 1],
    c='black', s=150, marker='X', edgecolor='white', linewidth=1.5,
    label='Centróides'
)
for i, (x, y) in enumerate(centroids_2d):
    plt.text(x+0.02, y+0.02, f"C{i}", fontsize=10, fontweight="bold", color="black")

plt.title(f"Distribuição dos subgrupos e centróides – {classe_alvo}")
plt.xlabel("PCA 1")
plt.ylabel("PCA 2")
plt.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

In [None]:
dist_matrix = cdist(centroids, centroids, metric='euclidean')

print("\nMatriz de distâncias entre centróides:")
print(np.round(dist_matrix, 3))

plt.figure(figsize=(7, 5))
sns.heatmap(dist_matrix, annot=True, fmt=".3f", cmap="Blues",
            xticklabels=[f"C{i}" for i in range(k)],
            yticklabels=[f"C{i}" for i in range(k)])
plt.title("Distâncias Euclidianas entre Centróides")
plt.xlabel("Cluster")
plt.ylabel("Cluster")
plt.tight_layout()
plt.show()

# Selecionando os dados para DCGAN

In [None]:
import os
from PIL import Image
from torchvision.utils import save_image

clusters_desejados = [0, 1]

indices_clusters_desejados = [
    i for i, c in enumerate(clusters) if c in clusters_desejados
]

print(f"Total de imagens selecionadas: {len(indices_clusters_desejados)}")

subset_clusters = torch.utils.data.Subset(subset_classe, indices_clusters_desejados)

In [None]:
import torch
import numpy as np

save_dir = "/kaggle/working/subset_baroque/"
os.makedirs(save_dir, exist_ok=True)

print(f"Salvando imagens em: {save_dir}")

mean = torch.tensor([0.4997, 0.4385, 0.3752]).view(3, 1, 1)
std = torch.tensor([0.2115, 0.1954, 0.1748]).view(3, 1, 1)

for i, (img, _) in enumerate(subset_clusters):
    img = img.clone().cpu()  
    img = img * std + mean
    img = torch.clamp(img, 0, 1)

    save_path = os.path.join(save_dir, f"img_{i:04d}.jpg")
    save_image(img, save_path)

print(f"{len(subset_clusters)} imagens salvas em {save_dir}")

# Construindo gerador DCGAN

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

image_size = 128  
batch_size = 64

transform_gan = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  
])

dataset_gan = datasets.ImageFolder(
    root="/kaggle/input/subset-dcgan",  
    transform=transform_gan
)

dataloader_gan = DataLoader(dataset_gan, batch_size=batch_size, shuffle=True, num_workers=4)
print(f"Dataset carregado com {len(dataset_gan)} imagens.")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.utils import make_grid, save_image
import os

device = "cuda" if torch.cuda.is_available() else "cpu"

vl = 128     # tam do vetor latente
nc = 3       # rgb
lr = 0.0002
beta1 = 0.5
num_epochs = 2000

class Generator(nn.Module):
    def __init__(self, z_dim=128, img_channels=3):
        super().__init__()
        self.net = nn.Sequential(
            nn.ConvTranspose2d(z_dim, 1024, 4, 1, 0, bias=False),  
            nn.BatchNorm2d(1024),
            nn.ReLU(True),

            nn.ConvTranspose2d(1024, 512, 4, 2, 1, bias=False),    
            nn.BatchNorm2d(512),
            nn.ReLU(True),

            nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False),     
            nn.BatchNorm2d(256),
            nn.ReLU(True),

            nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False),     
            nn.BatchNorm2d(128),
            nn.ReLU(True),

            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),      
            nn.BatchNorm2d(64),
            nn.ReLU(True),

            nn.ConvTranspose2d(64, img_channels, 4, 2, 1, bias=False),  # -> 128x128
            nn.Tanh()
        )

    def forward(self, z):
        return self.net(z)

class Discriminator(nn.Module):
    def __init__(self, img_channels=3):
        super().__init__()
        self.net = nn.Sequential(
            # 3, 128, 128
            nn.utils.spectral_norm(nn.Conv2d(img_channels, 64, 4, 2, 1, bias=False)),  # -> 64x64
            nn.LeakyReLU(0.2, inplace=True),

            nn.utils.spectral_norm(nn.Conv2d(64, 128, 4, 2, 1, bias=False)),  # -> 32x32
            nn.LeakyReLU(0.2, inplace=True),

            nn.utils.spectral_norm(nn.Conv2d(128, 256, 4, 2, 1, bias=False)),  # -> 16x16
            nn.LeakyReLU(0.2, inplace=True),

            nn.utils.spectral_norm(nn.Conv2d(256, 512, 4, 2, 1, bias=False)),  # -> 8x8
            nn.LeakyReLU(0.2, inplace=True),

            nn.utils.spectral_norm(nn.Conv2d(512, 1024, 4, 2, 1, bias=False)),  # -> 4x4
            nn.LeakyReLU(0.2, inplace=True),

            nn.utils.spectral_norm(nn.Conv2d(1024, 1, 4, 1, 0, bias=False))  # -> 1x1
        )

    def forward(self, x):
        return self.net(x).view(-1)

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.02)
        nn.init.constant_(m.bias.data, 0)

netG = Generator(z_dim=vl, img_channels=nc).to(device)
netD = Discriminator(img_channels=nc).to(device)

netG.apply(weights_init)
netD.apply(weights_init)

criterion = nn.BCEWithLogitsLoss()

optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

fixed_noise = torch.randn(64, vl, 1, 1, device=device)

In [None]:
import matplotlib.pyplot as plt

save_dir = "/kaggle/working/img_geradas/"
os.makedirs(save_dir, exist_ok=True)

for epoch in range(num_epochs):
    for i, (imgs, _) in enumerate(dataloader_gan):
        real_imgs = imgs.to(device)
        b_size = real_imgs.size(0)

        # label smoothing para estabilidade
        real_labels = torch.full((b_size,), 0.9, device=device)
        fake_labels = torch.full((b_size,), 0.0, device=device)

        optimizerD.zero_grad()

        output_real = netD(real_imgs)
        loss_real = criterion(output_real, real_labels)

        noise = torch.randn(b_size, vl, 1, 1, device=device)
        fake_imgs = netG(noise)
        output_fake = netD(fake_imgs.detach())
        loss_fake = criterion(output_fake, fake_labels)

        loss_D = loss_real + loss_fake
        loss_D.backward()
        optimizerD.step()

        optimizerG.zero_grad()
        output = netD(fake_imgs)
        loss_G = criterion(output, real_labels)
        loss_G.backward()
        optimizerG.step()

    with torch.no_grad():
        fake = netG(fixed_noise).detach().cpu()
    grid = make_grid(fake, padding=2, normalize=True)
    save_image(grid, os.path.join(save_dir, f"epoch_{epoch+1:03d}.png"))
    
    plt.figure(figsize=(6,6))
    plt.axis("off")
    plt.title(f"Imagens Geradas - Época {epoch+1}")
    plt.imshow(grid.permute(1, 2, 0).numpy())
    plt.show()
    
    print(f"[{epoch+1}/{num_epochs}] Loss D: {loss_D:.4f}, Loss G: {loss_G:.4f}")

print("Treinamento concluído e imagens salvas em:", save_dir)