In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import torch.nn as nn
import torch.optim as optim

## Definimos la clase Dataset 

In [2]:
class CustomImageDataset(Dataset):
    def __init__(self, directory, transform=None):
        self.directory = directory
        self.transform = transform
        self.image_paths = []
        self.labels = []
        
        # Obtener las clases (subdirectorios) y asignar un número
        self.class_names = sorted(os.listdir(directory))
        self.class_to_idx = {class_name: idx for idx, class_name in enumerate(self.class_names)}
        
        # Recorremos las carpetas y archivos
        for class_name in self.class_names:
            class_path = os.path.join(directory, class_name)
            if os.path.isdir(class_path):
                for file_name in os.listdir(class_path):
                    if file_name.endswith(('.jpg', '.png')):  # Filtra los tipos de archivo
                        self.image_paths.append(os.path.join(class_path, file_name))
                        self.labels.append(self.class_to_idx[class_name])
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        img = Image.open(img_path)
        
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [3]:
original_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Redimensiona la imagen
    transforms.ToTensor(),  # Convierte la imagen a un tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normaliza la imagen
])

augmented_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Redimensiona la imagen
    transforms.RandomHorizontalFlip(),  # Aplica un flip horizontal aleatorio
    transforms.RandomRotation(30),  # Rota aleatoriamente la imagen entre -30 y 30 grados
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Cambia aleatoriamente el brillo, el contraste, la saturación y el matiz
    transforms.ToTensor(),  # Convierte la imagen a un tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normaliza la imagen
])

In [4]:
train_original_dataset = CustomImageDataset(directory='archive/train', transform=original_transform)
train_augmented_dataset = CustomImageDataset(directory='archive/train', transform=augmented_transform)

valid_original_dataset = CustomImageDataset(directory='archive/valid', transform=original_transform)

test_original_dataset = CustomImageDataset(directory='archive/test', transform=original_transform)

In [5]:
train_original_loader = DataLoader(dataset=train_original_dataset, batch_size=32, shuffle=True)
train_augmented_loader = DataLoader(dataset=train_augmented_dataset, batch_size=32, shuffle=True)

valid_original_loader = DataLoader(dataset=valid_original_dataset, batch_size=32, shuffle=False)

test_original_loader = DataLoader(dataset=test_original_dataset, batch_size=32, shuffle=False)

In [6]:
class MyConvolutionalNeuralNetwork(nn.Module):
    def __init__(self):
        super(MyConvolutionalNeuralNetwork, self).__init__()
        self.convolutional_stack = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_features=56 * 56 * 128, out_features=128),  # Ajustado correctamente
            nn.ReLU(),
            nn.Linear(in_features=128, out_features=10)
        )

    def forward(self, x):
        x = self.convolutional_stack(x)
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [7]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model = MyConvolutionalNeuralNetwork().to(device)
print(model)


cpu
MyConvolutionalNeuralNetwork(
  (convolutional_stack): Sequential(
    (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=401408, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [8]:
# Loss function is CrossEntropyLoss
loss_fn = nn.CrossEntropyLoss()

# Learning rate is 0.001
learning_rate = 1e-3
batch_size = 64
# Optimize using Adam algorithm
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

In [9]:
def train_step(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch_idx, (X, y) in enumerate(dataloader):
        # Move tensors to the configured device
        X = X.to(device)
        # Compute prediction and loss
        logits = model(X)
        loss = loss_fn(logits, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch_idx % 100 == 0:
            loss, current = loss.item(), batch_idx * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [10]:
def validation_step(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            # Move tensors to device
            X, y = X.to(device), y.to(device)
            logits = model(X)
            test_loss += loss_fn(logits, y).item()
            correct += (logits.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Validation Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [11]:
epochs = 1
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_step(train_original_loader, model, loss_fn, optimizer)
    validation_step(valid_original_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.287110  [   32/ 1535]


KeyboardInterrupt: 