In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt

In [4]:
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
import torch

class AirplanesDataset(Dataset):
    def __init__(self, csv_file, img_dir, num_samples=None, transform=None):
        self.list_of_images = pd.read_csv(csv_file)
        self.images_directory = img_dir
        if num_samples is not None:
            self.list_of_images = self.list_of_images.head(num_samples)
        self.transform = transform if transform is not None else transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.list_of_images)

    def __getitem__(self, idx):
        image_path = os.path.join(self.images_directory, self.list_of_images.iloc[idx, 0])
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)


        bbox = np.array(self.list_of_images.iloc[idx, 1:5].values, dtype=np.float32)

        center_x = (bbox[0] + bbox[2]) / 2
        center_y = (bbox[1] + bbox[3]) / 2
        width = bbox[2] - bbox[0]
        height = bbox[3] - bbox[1]

        center_x /= 224
        center_y /= 224
        width /= 224
        height /= 224
        bbox = torch.tensor([center_x, center_y, width, height], dtype=torch.float32)

        return image, bbox.unsqueeze(0)


In [5]:
import torchvision.transforms as transforms

dataset_path = '/content/drive/MyDrive/Colab Notebooks/dl/airplanes-dataset'

csv_file_path = os.path.join(dataset_path, 'airplanes.csv')
image_directory = os.path.join(dataset_path, 'images')

transformations = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = AirplanesDataset(csv_file=csv_file_path, img_dir=image_directory, num_samples=100, transform=transformations)

image, target = dataset[0]
print(image.shape, target)

torch.Size([3, 224, 224]) tensor([[0.8951, 0.4196, 1.2634, 0.5268]])


In [6]:
from torch.utils.data import random_split

dataset_size = len(dataset)
train_size = int(0.8 * dataset_size)
val_size = int(0.15 * dataset_size)
test_size = dataset_size - train_size - val_size

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

print(f"Dataset size: {dataset_size}")
print(f"Train size: {len(train_dataset)}")
print(f"Validation size: {len(val_dataset)}")
print(f"Test size: {len(test_dataset)}")

Dataset size: 100
Train size: 80
Validation size: 15
Test size: 5


In [7]:
from torch.utils.data import DataLoader

batch_size = 10

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [17]:
import torch
import torch.nn as nn

class SimpleYOLO(nn.Module):
    def __init__(self):
        super(SimpleYOLO, self).__init__()
        # Backbone: Extracción de características
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.final_layer = nn.Conv2d(128, 5, kernel_size=1)

    def forward(self, x):
        x = self.backbone(x)
        x = self.final_layer(x)
        x = nn.functional.adaptive_avg_pool2d(x, 1)
        x = x.view(x.size(0), 5)
        return x[:, :4]  # Retorna solo las coordenadas del bounding box


In [18]:
# Crear el modelo
model = SimpleYOLO()
print(model)

SimpleYOLO(
  (backbone): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 

In [19]:
import torch
import time
from torchvision.ops import generalized_box_iou_loss

def fit(model, train_loader, val_loader, epochs, optimizer, checkpoint_path):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    best_val_loss = float('inf')

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        start_time = time.time()

        for i, (inputs, targets) in enumerate(train_loader):
            inputs = inputs.to(device)
            targets = targets.to(device).squeeze(1)

            optimizer.zero_grad()
            outputs = model(inputs)
            outputs = outputs.squeeze()  # Asegura que outputs sea [batch_size, 4]

            loss = generalized_box_iou_loss(outputs, targets)
            loss = loss.mean()

            loss.backward()
            train_loss += loss.item()
            optimizer.step()

            if (i + 1) % 10 == 0:
                print(f'Epoch {epoch + 1}, Batch {i + 1}/{len(train_loader)}, Train Loss: {loss.item():.4f}')

        avg_train_loss = train_loss / len(train_loader)
        print(f'Epoch {epoch + 1}, Average Training Loss: {avg_train_loss:.4f}, Time: {time.time() - start_time:.2f}s')

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs = inputs.to(device)
                targets = targets.to(device).squeeze(1)
                outputs = model(inputs)
                outputs = outputs.squeeze()

                batch_loss = generalized_box_iou_loss(outputs, targets)
                batch_loss = batch_loss.mean()
                val_loss += batch_loss.item()

        avg_val_loss = val_loss / len(val_loader)
        print(f'Epoch {epoch + 1}, Average Validation Loss: {avg_val_loss:.4f}')

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), checkpoint_path)
            print(f'New best model saved with avg_val_loss: {best_val_loss:.4f}')

    return avg_train_loss, avg_val_loss



In [20]:
import torch
import torch.optim as optim

# Inicializa el modelo
model = SimpleYOLO()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

epochs = 50
optimizer = optim.Adam(model.parameters(), lr=0.001)
checkpoint_path = "/content/drive/MyDrive/Colab Notebooks/dl/airplanes_model/airplanes-yolo-model"

# Llama al método de entrenamiento
train_loss, val_loss = fit(model, train_loader, val_loader, epochs, optimizer, checkpoint_path)

print(f"Training Loss: {train_loss}")
print(f"Validation Loss: {val_loss}")

Epoch 1, Average Training Loss: -3.0665, Time: 14.39s
Epoch 1, Average Validation Loss: 1.9647
New best model saved with avg_val_loss: 1.9647
Epoch 2, Average Training Loss: -102.5345, Time: 0.61s
Epoch 2, Average Validation Loss: 1.4327
New best model saved with avg_val_loss: 1.4327
Epoch 3, Average Training Loss: -824589.2492, Time: 0.69s
Epoch 3, Average Validation Loss: -0.0951
New best model saved with avg_val_loss: -0.0951
Epoch 4, Average Training Loss: -1939362.4494, Time: 0.69s
Epoch 4, Average Validation Loss: 1.6253
Epoch 5, Average Training Loss: -3290936.8386, Time: 0.70s
Epoch 5, Average Validation Loss: 546.7478
Epoch 6, Average Training Loss: -8280372.6579, Time: 0.69s
Epoch 6, Average Validation Loss: 2046.1840
Epoch 7, Average Training Loss: -5460369.2971, Time: 0.60s
Epoch 7, Average Validation Loss: 2103.4165
Epoch 8, Average Training Loss: -9890201.5759, Time: 0.59s
Epoch 8, Average Validation Loss: 956.2035
Epoch 9, Average Training Loss: -9914967.3488, Time: 0.60