# ProjectWork Deep Learning
https://www.kaggle.com/competitions/mlnomads-mlolympiad24/overview

In [None]:
# 📦 1. Imports
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

import albumentations as A
from albumentations.pytorch import ToTensorV2

import segmentation_models_pytorch as smp


In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Upload Data

In [None]:
# Paths

# TRAIN_IMG_DIR = "./Data/train/"
# TRAIN_MASK_DIR = "./Data/train_labels/"
# TEST_IMG_DIR = "./test/"

TRAIN_IMG_DIR = "/content/drive/MyDrive/MoroccoDataset/train/"
TRAIN_MASK_DIR = "/content/drive/MyDrive/MoroccoDataset/train_labels/"
TEST_IMG_DIR = "/content/drive/MyDrive/MoroccoDataset/test/"

IMG_SIZE = 256


In [None]:
# Visualize sample
def visualize_sample(idx):
    img = cv2.imread(os.path.join(TRAIN_IMG_DIR, f"{idx}.jpg"))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(os.path.join(TRAIN_MASK_DIR, f"{idx}.png"), 0)

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.title("Image")

    plt.subplot(1, 2, 2)
    plt.imshow(mask, cmap="gray")
    plt.title("Water Mask")
    plt.show()

visualize_sample("img_001")

## Explore Data
- Water vs non-water distribution
- Image diversity: snow, desert, coast, etc.
- Empty masks (no water)
- Potential class imbalance

## Build a Baseline Model
Start with a simple U-Net or DeepLabV3+ model:
- Use pre-trained backbones (like resnet34, efficientnet) via libraries like segmentation_models.pytorch or tensorflow.keras.applications.
- Input: 3-channel RGB
- Output: 256x256 binary mask (sigmoid output)

In [None]:
# Dataset class
class WaterDataset(Dataset):
    def __init__(self, image_dir, mask_dir=None, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.image_ids = sorted(os.listdir(image_dir))
        self.transform = transform
        self.has_mask = mask_dir is not None

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img = cv2.imread(os.path.join(self.image_dir, img_id))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.has_mask:
            mask_path = os.path.join(self.mask_dir, img_id.replace(".jpg", ".png"))
            mask = cv2.imread(mask_path, 0)
            mask = np.expand_dims(mask, axis=-1)
        else:
            mask = np.zeros((IMG_SIZE, IMG_SIZE, 1), dtype=np.uint8)

        if self.transform:
            augmented = self.transform(image=img, mask=mask)
            img = augmented["image"]
            mask = augmented["mask"]
        return img, mask.float()
    
# Transforms
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Normalize(),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Normalize(),
    ToTensorV2(),
])

In [None]:
# Model
model = smp.Unet(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
)

## Augmentations & Dataloaders
Use Albumentations for realistic augmentations and Create a custom Dataset class for training.

In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

transform = A.Compose([
    A.HorizontalFlip(),
    A.VerticalFlip(),
    A.RandomBrightnessContrast(),
    A.Normalize(),
    ToTensorV2()
])

## Loss Functions
Dice loss is your best friend here. Combine it with BCE for stability. Or use smp.losses.DiceLoss

In [None]:
# Loss & Optimizer
def dice_loss(pred, target, smooth=1.):
    pred = pred.contiguous()
    target = target.contiguous()
    intersection = (pred * target).sum(dim=2).sum(dim=2)
    loss = 1 - ((2. * intersection + smooth) / (pred.sum(dim=2).sum(dim=2) + target.sum(dim=2).sum(dim=2) + smooth))
    return loss.mean()

bce = nn.BCEWithLogitsLoss()

def loss_fn(pred, target):
    return 0.5 * bce(pred, target) + 0.5 * dice_loss(torch.sigmoid(pred), target)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


## Training & Validation Loop
Train your model on the dataset, validate on a hold-out split or cross-validation fold.

Track:
- Dice score on val set
- Loss curves
- Qualitative predictions

In [None]:
# Train Loop
def train_fn(model, loader, loss_fn, optimizer):
    model.train()
    running_loss = 0
    for imgs, masks in tqdm(loader):
        imgs, masks = imgs.cuda(), masks.cuda()
        preds = model(imgs)
        loss = loss_fn(preds, masks)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(loader)

# Validation Loop
def validate_fn(model, loader):
    model.eval()
    dices = []
    with torch.no_grad():
        for imgs, masks in loader:
            imgs, masks = imgs.cuda(), masks.cuda()
            preds = torch.sigmoid(model(imgs))
            preds = (preds > 0.5).float()
            intersection = (preds * masks).sum()
            union = preds.sum() + masks.sum()
            dice = (2. * intersection) / (union + 1e-7)
            dices.append(dice.item())
    return np.mean(dices)

In [None]:
# Train & Eval
model = model.cuda()

train_ds = WaterDataset(TRAIN_IMG_DIR, TRAIN_MASK_DIR, transform=train_transform)
val_ds = WaterDataset(TRAIN_IMG_DIR, TRAIN_MASK_DIR, transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=8)

for epoch in range(10):
    train_loss = train_fn(model, train_loader, loss_fn, optimizer)
    val_score = validate_fn(model, val_loader)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val Dice: {val_score:.4f}")

## Prediction & RLE Encoding
After prediction, convert binary masks into RLE format for submission:

In [None]:
# Prediction & Submission
def mask_to_rle(mask):
    pixels = mask.flatten(order="F")
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return " ".join(str(x) for x in runs)

test_ds = WaterDataset(TEST_IMG_DIR, transform=val_transform)
test_loader = DataLoader(test_ds, batch_size=1, shuffle=False)

model.eval()
rles = []
image_names = sorted(os.listdir(TEST_IMG_DIR))

for i, (img, _) in enumerate(tqdm(test_loader)):
    img = img.cuda()
    with torch.no_grad():
        pred = torch.sigmoid(model(img))[0, 0].cpu().numpy()
        mask = (pred > 0.5).astype(np.uint8)
        rle = mask_to_rle(mask)
        rles.append([image_names[i], rle])

submission = pd.DataFrame(rles, columns=["image_name", "rle_mask"])
submission.to_csv("submission.csv", index=False)


## Optimize & Experiment
Try better encoders (EfficientNet, Swin Transformer, ConvNeXt)
- Use TTA (test-time augmentation)
- Try attention-based U-Nets
- Use pseudo-labeling or ensemble different models