In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.optim as optim

from torchvision import transforms

In [None]:
#Ruta al CSV (usa test o train según corresponda)
csv_path_train = "/workspace/sign_mnist_train.csv"
csv_path_test = "/workspace/sign_mnist_test.csv"
df_train = pd.read_csv(csv_path_train)
df_test = pd.read_csv(csv_path_test)

#Separar etiquetas y píxeles
labels = df_test['label'].values.astype(np.int64)
pixels = df_test.drop(columns='label').to_numpy().astype(np.float32)
labels = df_train['label'].values.astype(np.int64)
pixels = df_train.drop(columns='label').to_numpy().astype(np.float32)
#Reformar a imágenes 28x28
image_test = pixels.reshape(-1, 28, 28)
image_train = pixels.reshape(-1, 28, 28)

print("Imágen test:", image_test.shape)
print("Imágen train:", image_train.shape)
print("Labels:", labels.shape)

In [None]:
idx = 0
img_np = image_test[idx]
label = labels[idx]

In [None]:
#Transformación base: de numpy a tensor (1 canal)
to_tensor = transforms.Compose([
    transforms.ToTensor()
])

img_tensor = to_tensor(img_np)  

In [None]:
# 1 - Rotación
rot_transform = transforms.RandomRotation(degrees=30)

# 2 - Traslación (usando affine)
translate_transform = transforms.RandomAffine(
    degrees=0, translate=(0.2, 0.2)  # hasta 20% en cada eje
)

# 3 - Escalado (zoom)
scale_transform = transforms.RandomAffine(
    degrees=0, scale=(0.8, 1.2)
)

# 4 - Inversión de colores (para tensor)
invert_transform = transforms.RandomInvert(p=1.0)

# 5 - Adición de ruido (definimos una función propia)
class AddGaussianNoise(object):
    def __init__(self, mean=0.0, std=0.1):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        noise = torch.randn_like(tensor) * self.std + self.mean
        return torch.clamp(tensor + noise, 0.0, 1.0)

noise_transform = AddGaussianNoise(mean=0.0, std=0.2)

# 6) Recorte aleatorio
# Para recorte en 28x28, ampliamos a 32x32 y recortamos
crop_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.RandomCrop((28, 28))
])

In [None]:
Aplicamos transformaciones y variables para visualizar

In [None]:
img_rot   = rot_transform(img_tensor)
img_trans = translate_transform(img_tensor)
img_scale = scale_transform(img_tensor)
img_inv   = invert_transform(img_tensor)
img_noise = noise_transform(img_tensor)
img_crop  = crop_transform(img_tensor)

In [None]:
def show_gray_image(tensor, title=""):
    img = tensor.squeeze(0).numpy()
    plt.imshow(img, cmap="gray")
    plt.axis("off")
    if title:
        plt.title(title)

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(2, 4, 1)
show_gray_image(img_tensor, title=f"Original (label={label})")

plt.subplot(2, 4, 2)
show_gray_image(img_rot, "Rotación")

plt.subplot(2, 4, 3)
show_gray_image(img_trans, "Traslación")

plt.subplot(2, 4, 4)
show_gray_image(img_scale, "Escalado")

plt.subplot(2, 4, 5)
show_gray_image(img_inv, "Inversión")

plt.subplot(2, 4, 6)
show_gray_image(img_noise, "Ruido")

plt.subplot(2, 4, 7)
show_gray_image(img_crop, "Random Crop")

plt.tight_layout()
plt.show()

In [None]:
class SignMNISTDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.labels[idx]

        img = img.astype(np.float32) / 255.0 
        img = torch.from_numpy(img).unsqueeze(0) 

        if self.transform is not None:
            img = self.transform(img)

        return img, label

#Normalización para CNN
cnn_transform = transforms.Normalize(mean=[0.5], std=[0.5])
full_dataset = SignMNISTDataset(images, labels, transform=cnn_transform)

In [None]:
dataset_size = len(full_dataset)
train_size = int(0.8 * dataset_size)
val_size = dataset_size - train_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

print("Train size:", len(train_dataset))
print("Val size:", len(val_dataset))

In [None]:
batch_size = 32

train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=2)
val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=False,
                        num_workers=2)

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()

        # Ejemplo base: kernel_size=3, padding=1, stride=1, pool 2x2
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        fc_input = 64 * 7 * 7

        self.fc1 = nn.Linear(fc_input, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

#Número de clases en Sign Language MNIST (generalmente 24 o 25, según versión)
num_classes = len(np.unique(labels))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = SimpleCNN(num_classes=num_classes).to(device)
print(model)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 5

for epoch in range(num_epochs):
    # ---- Entrenamiento ----
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * x_batch.size(0)
        _, preds = torch.max(outputs, 1)
        train_total += y_batch.size(0)
        train_correct += (preds == y_batch).sum().item()

    avg_train_loss = train_loss / train_total
    train_acc = 100.0 * train_correct / train_total

    #Validación
    
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)

            val_loss += loss.item() * x_batch.size(0)
            _, preds = torch.max(outputs, 1)
            val_total += y_batch.size(0)
            val_correct += (preds == y_batch).sum().item()

    avg_val_loss = val_loss / val_total
    val_acc = 100.0 * val_correct / val_total

    print(f"Época [{epoch+1}/{num_epochs}] "
          f"- Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.2f}% "
          f"- Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%")