In [1]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
from ultralytics import YOLO
import cv2


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import torch

# Ver versión de PyTorch
print(f"Versión de PyTorch: {torch.__version__}")

# Ver disponibilidad de CUDA
print(f"CUDA disponible: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"Nombre de la GPU: {torch.cuda.get_device_name(0)}")
else:
    print("CUDA no está disponible.")


Versión de PyTorch: 1.13.1+cu116
CUDA disponible: True
Nombre de la GPU: NVIDIA GeForce RTX 4060 Laptop GPU


In [4]:
# Configuración del dispositivo (GPU o CPU)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Parámetros del modelo y entrenamiento
NUM_CLASSES = 10  # Número de equipos de F1
BATCH_SIZE = 8
NUM_WORKERS = 4
NUM_EPOCHS = 10
LEARNING_RATE = 0.005


In [None]:
class F1Dataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation_path):
        self.root = root
        self.annotations = os.listdir(annotation_path)
        self.transform = transforms.ToTensor()
        
    def __getitem__(self, idx):
        annotation_path = os.path.join(self.annotation_path, self.annotations[idx])
        with open(annotation_path, 'r') as f:
            boxes = []
            labels = []
            for line in f.readlines():
                parts = line.strip().split()
                class_id = int(parts[0])
                x_center = float(parts[1])
                y_center = float(parts[2])
                width = float(parts[3])
                height = float(parts[4])
                
                # Convertir a coordenadas absolutas
                x1 = x_center - width / 2
                y1 = y_center - height / 2
                x2 = x_center + width / 2
                y2 = y_center + height / 2
                
                boxes.append([x1, y1, x2, y2])
                labels.append(class_id)
                
            # Cargar imagen
            image_path = os.path.join(self.root, self.annotations[idx].replace('.txt', '.jpg'))
            image = self.transform(Image.open(image_path))
            
            # Crear target
            target = {
                'boxes': torch.as_tensor(boxes, dtype=torch.float32),
                'labels': torch.as_tensor(labels, dtype=torch.int64),
                'image_id': torch.tensor([idx])
            }
            
            return image, target
            
    def __len__(self):
        return len(self.annotations)


In [None]:
# Rutas a los conjuntos de datos
train_root = '../train/images'
train_annotation_path = '../train/labels'
val_root = '../valid/images'
val_annotation_path = '../valid/labels'

# Crear datasets
train_dataset = F1Dataset(root=train_root, annotation_path=train_annotation_path)
val_dataset = F1Dataset(root=val_root, annotation_path=val_annotation_path)

# Crear DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)


In [None]:
# Cargar el modelo YOLOv8 Nano
model = YOLO("yolov8n.yaml")
model.train()
model.to(DEVICE)


In [None]:
# Optimizador y programador de tasa de aprendizaje
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [None]:
def train(model, device, loader, optimizer, epoch):
    model.train()
    for batch in loader:
        images, targets = batch
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        
def validate(model, device, loader, epoch):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in loader:
            images, targets = batch
            images = list(img.to(device) for img in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            outputs = model(images)
            loss = outputs.loss
            total_loss += loss.item()
    
    avg_loss = total_loss / len(loader)
    print(f'Epoch {epoch+1}/{NUM_EPOCHS}, Validation Loss: {avg_loss:.4f}')


In [None]:
# Iniciar el entrenamiento
for epoch in range(NUM_EPOCHS):
    train(model, DEVICE, train_loader, optimizer, epoch)
    validate(model, DEVICE, val_loader, epoch)
    
    # Guardar checkpoint
    torch.save(model.state_dict(), f'f1_detection_epoch_{epoch+1}.pth')


In [None]:
def detect_in_video(model, video_path):
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = transforms.ToTensor()(frame)
        frame = frame.unsqueeze(0)
        
        outputs = model(frame)
        outputs.print()
        
        # Convertir de vuelta a BGR para mostrar
        frame = frame.squeeze(0).permute(1, 0, 2).numpy()
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        
        cv2.imshow('Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
    cap.release()
    cv2.destroyAllWindows()


In [None]:
# Cargar el modelo entrenado
model.load_state_dict(torch.load('f1_detection_epoch_10.pth'))
model.eval()

# Ruta al video
video_path = 'path/to/your/video.mp4'

# Detectar en el video
detect_in_video(model, video_path)
