importamos librerias

In [8]:
import os
import json
from PIL import Image


definimos rutas

In [9]:
json_path = "archive/train_dataset/train.json"
images_dir = "archive/train_dataset/train_images"
output_dir = "./dataset"

Crear carpetas de salida por clase

In [10]:
# Crear carpetas de salida
os.makedirs(output_dir, exist_ok=True)
for label in ["red", "yellow", "green"]:
    os.makedirs(os.path.join(output_dir, label), exist_ok=True)


Procesar el JSON y guardar los recortes

In [11]:
# Cargar anotaciones
with open(json_path, "r") as f:
    data = json.load(f)

# Recortar y guardar imágenes con verificación de claves
for i, annotation in enumerate(data["annotations"]):
    if "label" not in annotation or "box" not in annotation:
        continue  

    filename = annotation["filename"].replace("\\", "/").split("/")[-1] 
    label = annotation["label"]
    box = annotation["box"]

    image_path = os.path.join(images_dir, filename)
    if not os.path.exists(image_path):
        print(f"Imagen no encontrada: {image_path}")
        continue

    try:
        image = Image.open(image_path).convert("RGB")
        cropped = image.crop((box["xmin"], box["ymin"], box["xmax"], box["ymax"]))
        save_path = os.path.join(output_dir, label, f"{i:05d}.jpg")
        cropped.save(save_path)
    except Exception as e:
        print(f"Error procesando {filename}: {e}")


importamos librerias para realizar el entrenamiento

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt


Preparar las transformaciones y cargar el dataset

In [13]:
# Transformaciones para las imágenes
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # redimensionar todas las imágenes
    transforms.ToTensor(),        # convertir a tensor
    transforms.Normalize(mean=[0.5], std=[0.5])  # normalizar
])

# Cargar el dataset desde las carpetas
dataset_path = "./dataset"
dataset = datasets.ImageFolder(dataset_path, transform=transform)

# Dividir en train y val (80%-20%)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


FileNotFoundError: Found no valid file for the classes green, red, yellow. Supported extensions are: .jpg, .jpeg, .png, .ppm, .bmp, .pgm, .tif, .tiff, .webp

Definimos una CNN

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 64),
            nn.ReLU(),
            nn.Linear(64, 3)  # 3 clases: red, yellow, green
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)


entrenamos el modelo

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Validación
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss:.4f} - Val Accuracy: {accuracy:.2%}")
