In [None]:
!pip install opendatasets

In [None]:
import os
import shutil

# Ruta local relativa a la carpeta del dataset
datapath = "./garbage_classification"  # sin /content, porque estás fuera de Google Colab

# Carpetas que deseas conservar
util_dirs = ['cardboard', 'paper', 'metal', 'white-glass', 'plastic', 'brown-glass', 'green-glass']

# Recorremos las carpetas dentro del dataset
for dir in os.listdir(datapath):
    dir_path = os.path.join(datapath, dir)
    if os.path.isdir(dir_path) and dir not in util_dirs:
        try:
            shutil.rmtree(dir_path)
            print(f"Carpeta '{dir_path}' eliminada correctamente.")
        except OSError as e:
            print(f"Error al eliminar la carpeta '{dir_path}': {e}")



In [None]:
counter = 0
for dir in os.listdir(datapath):
  dir_path = os.path.join(datapath, dir)
  counter += len(os.listdir(dir_path))
print(counter)
TOTAL_SAMPLES = counter

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split, DataLoader
from torchvision import datasets, transforms, models
from tqdm.notebook import tqdm, trange
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# 1. Transformaciones
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 2. Dataset
base_dataset = datasets.ImageFolder(root=datapath)
total_size = len(base_dataset)
train_size = int(0.8 * total_size)
val_size = int(0.1 * total_size)
test_size = total_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(base_dataset, [train_size, val_size, test_size])

# Asignar transformaciones específicas
train_dataset.dataset.transform = train_transforms
val_dataset.dataset.transform = val_transforms
test_dataset.dataset.transform = val_transforms

# 3. Loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)


# 4. Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
num_classes = 7  # <-- pon el número de clases correcto

# Congelar TODAS las capas
for param in model.parameters():
    param.requires_grad = False


model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# 5. Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

    # 6. Training Loop
for epoch in trange(20):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/20] Loss: {running_loss/len(train_loader):.4f}")

# 7. Testing
correct = 0
total = len(test_dataset)
all_labels = []
all_probs = []

model.eval()
with torch.no_grad():
    for images, labels in tqdm(test_loader):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probabilities = torch.softmax(outputs, dim=1)  # Obtener probabilidades
        predictions = torch.argmax(outputs, dim=1)

        correct += (predictions == labels).sum().item()

        # Guardar etiquetas y probabilidades para calcular AUC
        all_labels.extend(labels.cpu().numpy())
        all_probs.extend(probabilities.cpu().numpy())

# Calcular AUC
all_labels = np.array(all_labels)
all_probs = np.array(all_probs)
auc = roc_auc_score(all_labels, all_probs, multi_class='ovr')  # 'ovr' para One-vs-Rest

print('Test accuracy: {:.2f}%'.format(100 * correct / total))
print('Test AUC: {:.4f}'.format(auc))


predicted_classes = np.argmax(all_probs, axis=1)

conf_matrix = confusion_matrix(all_labels, predicted_classes)

disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=base_dataset.classes)
disp.plot(cmap='viridis', xticks_rotation='vertical')
