In [2]:
!pip install torch torchvision
!pip install segmentation-models-pytorch
!pip install albumentations
!pip install matplotlib
!pip install pillow
!pip install numpy


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Collecting albumentations
  Downloading albumentations-2.0.8-py3-none-any.whl.metadata (43 kB)
Collecting pydantic>=2.9.2 (from albumentations)
  Downloading pydantic-2.12.3-py3-none-any.whl.metadata (87 kB)
Collecting albucore==0.0.24 (from albumentations)
  Downloading albucore-0.0.24-py3-none-any.whl.metadata (5.3 kB)
Collecting opencv-python-headless>=4.9.0.80 (from albumentations)
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux

In [3]:
import torch
from torch.utils.data import Dataset
import numpy as np
from PIL import Image
from albumentations import Compose, HorizontalFlip, VerticalFlip, RandomRotate90

# Permite cargar imágenes truncadas
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

class UrbanDataset(Dataset):
    def __init__(self, img_paths, mask_paths, transform=None):
        self.img_paths = img_paths
        self.mask_paths = mask_paths
        self.transform = transform
        
    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        image = np.array(Image.open(self.img_paths[idx]).convert("RGB"))
        mask = np.array(Image.open(self.mask_paths[idx]).convert("L"))
        mask = mask / 255.0  # Normaliza a 0-1
        
        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image, mask = augmented['image'], augmented['mask']
        
        # Convertir a tensores
        image = torch.tensor(image).permute(2,0,1).float() / 255.0
        mask = torch.tensor(mask).unsqueeze(0).float()
        return image, mask


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
transform = Compose([
    HorizontalFlip(),
    VerticalFlip(),
    RandomRotate90()
])

In [5]:
import glob
from torch.utils.data import DataLoader

train_img_paths = sorted(glob.glob("dataset/train/images/*.png"))
train_mask_paths = sorted(glob.glob("dataset/train/masks/*.png"))
val_img_paths = sorted(glob.glob("dataset/val/images/*.png"))
val_mask_paths = sorted(glob.glob("dataset/val/masks/*.png"))

train_dataset = UrbanDataset(train_img_paths, train_mask_paths, transform=transform)
val_dataset = UrbanDataset(val_img_paths, val_mask_paths)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)

In [6]:
import segmentation_models_pytorch as smp

model = smp.Unet(
    encoder_name="resnet34",        # Backbone preentrenada
    encoder_weights="imagenet",     # Usa pesos de ImageNet
    in_channels=3,                  # RGB
    classes=1                       # 1 clase: manzana
)

loss_fn = smp.losses.DiceLoss(mode='binary')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [14]:
import glob
train_img_paths = sorted(glob.glob("/workspaces/PI-EJM/dataset/train/images/*.png"))
train_mask_paths = sorted(glob.glob("/workspaces/PI-EJM/dataset/train/mask/*.png"))

print("Imágenes encontradas:", len(train_img_paths))
print("Máscaras encontradas:", len(train_mask_paths))


Imágenes encontradas: 4
Máscaras encontradas: 4


In [15]:
# Emparejamiento seguro
paired_train = list(zip(train_img_paths, train_mask_paths))
train_img_paths, train_mask_paths = zip(*paired_train)

print(f"Cantidad de pares: {len(paired_train)}")  # Debe salir 4


Cantidad de pares: 4


In [16]:
train_dataset = UrbanDataset(train_img_paths, train_mask_paths, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)  # Ajusta batch_size según tu GPU


In [18]:
from albumentations import Resize

transform = Compose([
    Resize(256, 256),   # Asegura que imagen y máscara tengan el mismo tamaño
    HorizontalFlip(),
    VerticalFlip(),
    RandomRotate90()
])


In [22]:
train_dataset = UrbanDataset(train_img_paths, train_mask_paths, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)


In [20]:
transform = Compose([
    HorizontalFlip(),
    VerticalFlip(),
    RandomRotate90()
], is_check_shapes=False)


In [23]:
import segmentation_models_pytorch as smp

model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1
).to(device)

loss_fn = smp.losses.DiceLoss(mode='binary')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [26]:
from PIL import Image

for p in train_img_paths:
    try:
        Image.open(p).verify()
        print(f"{p} OK")
    except:
        print(f"{p} CORRUPTO")


/workspaces/PI-EJM/dataset/train/images/img_01.png CORRUPTO
/workspaces/PI-EJM/dataset/train/images/img_02.png CORRUPTO
/workspaces/PI-EJM/dataset/train/images/img_03.png CORRUPTO
/workspaces/PI-EJM/dataset/train/images/img_04.png CORRUPTO


In [27]:
import matplotlib.pyplot as plt

# Poner el modelo en modo evaluación
model.eval()
with torch.no_grad():
    # Tomar un batch de imágenes y máscaras
    images, masks = next(iter(train_loader))
    images, masks = images.to(device), masks.to(device)
    
    # Obtener predicciones
    preds = (model(images) > 0.5).float()
    
    # Seleccionar la primera imagen del batch para mostrar
    image = images[0].permute(1,2,0).cpu().numpy()  # De C,H,W a H,W,C
    mask = masks[0][0].cpu().numpy()               # Primer canal de la máscara
    pred = preds[0][0].cpu().numpy()               # Primer canal de la predicción
    
    # Visualización
    plt.figure(figsize=(12,4))
    
    plt.subplot(1,3,1)
    plt.imshow(image)
    plt.title("Imagen original")
    
    plt.subplot(1,3,2)
    plt.imshow(mask)
    plt.title("Máscara real")
    
    plt.subplot(1,3,3)
    plt.imshow(pred)
    plt.title("Predicción U-Net")
    
    plt.show()



UnidentifiedImageError: cannot identify image file '/workspaces/PI-EJM/dataset/train/images/img_02.png'