# **Project SEE-DR: Diabetic Retinopathy Segmentation Data Pipeline**
Data is loaded, augmented, and saved into .pt files for training.

In [5]:
import pandas as pd
import numpy as np
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations import ToTensorV2
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import cv2
import os
from sklearn.model_selection import train_test_split
print("")




## **1. Create DataFrames**
Extracts the image and mask paths from the folder. All data is extracted at once into one dataframe. Train-test 75/25 split is applied after we get the full dataframe.

**Important!: Files must have this exact structure:**  
```
cmac-segmentation (root)/
├── DDR-SEGMENTATION/
│   ├── train/
│   │   ├── image/
│   │   └── label/
│   │       ├── EX/
│   │       ├── HE/
│   │       ├── MA/
│   │       └── SE/
│   ├── test/
│   │   ├── image/
│   │   └── label/
│   │       ├── EX/
│   │       ├── HE/
│   │       ├── MA/
│   │       └── SE/
│   └── valid/
│       ├── image/
│       └── label/
│           ├── EX/
│           ├── HE/
│           ├── MA/
│           └── SE/
└── IDRID/
    ├── Original_Images/
    │   ├── test/
    │   └── train/
    └── Segmentation_Groundtruths/
        ├── test/
        │   ├── 1. Microaneurysms/
        │   ├── 2. Haemorrhages/
        │   ├── 3. Hard Exudates/
        │   ├── 4. Soft Exudates/
        │   └── 5. Optic Disk/
        └── train/
            ├── 1. Microaneurysms/
            ├── 2. Haemorrhages/
            ├── 3. Hard Exudates/
            ├── 4. Soft Exudates/
            └── 5. Optic Disk/
```

In [6]:
def build_dataframe(ddr_root: str, idrid_root: str):
    rows = []
    for root, dirs, files in os.walk(ddr_root):
        for f in files:
            if f.lower().endswith('.jpg') and os.path.basename(root) == "image":
                img_path = os.path.join(root, f)
                base = os.path.splitext(f)[0]
                label_root = root.replace("image", "label")

                ex = os.path.join(label_root, "EX", base + ".tif")
                he = os.path.join(label_root, "HE", base + ".tif")
                ma = os.path.join(label_root, "MA", base + ".tif")
                se = os.path.join(label_root, "SE", base + ".tif")

                rows.append({
                    "dataset": "DDR",
                    "image_path": img_path,
                    "ex_path": ex if os.path.exists(ex) else None,
                    "he_path": he if os.path.exists(he) else None,
                    "ma_path": ma if os.path.exists(ma) else None,
                    "se_path": se if os.path.exists(se) else None,
                    "od_path": None
                })
    for split in ["train", "test"]:
        img_dir = os.path.join(idrid_root, "Original_Images", split)
        if not os.path.exists(img_dir):
            continue

        for f in os.listdir(img_dir):
            if f.lower().endswith(('.jpg', '.png', '.jpeg', '.tif')):
                img_path = os.path.join(img_dir, f)
                base = os.path.splitext(f)[0]

                mask_base = os.path.join(idrid_root, "Segmentation_Groundtruths", split)

                ma = os.path.join(mask_base, "1. Microaneurysms", base + "_MA.tif")
                he = os.path.join(mask_base, "2. Haemorrhages", base + "_HE.tif")
                ex = os.path.join(mask_base, "3. Hard Exudates", base + "_EX.tif")
                se = os.path.join(mask_base, "4. Soft Exudates", base + "_SE.tif")
                od = os.path.join(mask_base, "5. Optic Disc", base + "_OD.tif")

                rows.append({
                    "dataset": "IDRID",
                    "image_path": img_path,
                    "ma_path": ma if os.path.exists(ma) else None,
                    "he_path": he if os.path.exists(he) else None,
                    "ex_path": ex if os.path.exists(ex) else None,
                    "se_path": se if os.path.exists(se) else None,
                    "od_path": od if os.path.exists(od) else None
                })

    df = pd.DataFrame(rows)
    return df

df = build_dataframe('DDR-SEGMENTATION', 'IDRID')


In [7]:
df.isna().sum()

dataset         0
image_path      0
ex_path         0
he_path         1
ma_path         0
se_path        41
od_path       757
dtype: int64

In [8]:
df.shape

(838, 7)

In [9]:
df.sample(10)

Unnamed: 0,dataset,image_path,ex_path,he_path,ma_path,se_path,od_path
486,DDR,DDR-SEGMENTATION/train/image/007-3445-200.jpg,DDR-SEGMENTATION/train/label/EX/007-3445-200.tif,DDR-SEGMENTATION/train/label/HE/007-3445-200.tif,DDR-SEGMENTATION/train/label/MA/007-3445-200.tif,DDR-SEGMENTATION/train/label/SE/007-3445-200.tif,
737,DDR,DDR-SEGMENTATION/train/image/007-6383-400.jpg,DDR-SEGMENTATION/train/label/EX/007-6383-400.tif,DDR-SEGMENTATION/train/label/HE/007-6383-400.tif,DDR-SEGMENTATION/train/label/MA/007-6383-400.tif,DDR-SEGMENTATION/train/label/SE/007-6383-400.tif,
621,DDR,DDR-SEGMENTATION/train/image/007-2272-100.jpg,DDR-SEGMENTATION/train/label/EX/007-2272-100.tif,DDR-SEGMENTATION/train/label/HE/007-2272-100.tif,DDR-SEGMENTATION/train/label/MA/007-2272-100.tif,DDR-SEGMENTATION/train/label/SE/007-2272-100.tif,
438,DDR,DDR-SEGMENTATION/train/image/007-3308-200.jpg,DDR-SEGMENTATION/train/label/EX/007-3308-200.tif,DDR-SEGMENTATION/train/label/HE/007-3308-200.tif,DDR-SEGMENTATION/train/label/MA/007-3308-200.tif,DDR-SEGMENTATION/train/label/SE/007-3308-200.tif,
435,DDR,DDR-SEGMENTATION/train/image/007-5459-300.jpg,DDR-SEGMENTATION/train/label/EX/007-5459-300.tif,DDR-SEGMENTATION/train/label/HE/007-5459-300.tif,DDR-SEGMENTATION/train/label/MA/007-5459-300.tif,DDR-SEGMENTATION/train/label/SE/007-5459-300.tif,
589,DDR,DDR-SEGMENTATION/train/image/007-5518-300.jpg,DDR-SEGMENTATION/train/label/EX/007-5518-300.tif,DDR-SEGMENTATION/train/label/HE/007-5518-300.tif,DDR-SEGMENTATION/train/label/MA/007-5518-300.tif,DDR-SEGMENTATION/train/label/SE/007-5518-300.tif,
756,DDR,DDR-SEGMENTATION/train/image/007-4980-300.jpg,DDR-SEGMENTATION/train/label/EX/007-4980-300.tif,DDR-SEGMENTATION/train/label/HE/007-4980-300.tif,DDR-SEGMENTATION/train/label/MA/007-4980-300.tif,DDR-SEGMENTATION/train/label/SE/007-4980-300.tif,
134,DDR,DDR-SEGMENTATION/valid/image/007-7236-400.jpg,DDR-SEGMENTATION/valid/label/EX/007-7236-400.tif,DDR-SEGMENTATION/valid/label/HE/007-7236-400.tif,DDR-SEGMENTATION/valid/label/MA/007-7236-400.tif,DDR-SEGMENTATION/valid/label/SE/007-7236-400.tif,
653,DDR,DDR-SEGMENTATION/train/image/007-2371-100.jpg,DDR-SEGMENTATION/train/label/EX/007-2371-100.tif,DDR-SEGMENTATION/train/label/HE/007-2371-100.tif,DDR-SEGMENTATION/train/label/MA/007-2371-100.tif,DDR-SEGMENTATION/train/label/SE/007-2371-100.tif,
348,DDR,DDR-SEGMENTATION/test/image/20170510144008679.jpg,DDR-SEGMENTATION/test/label/EX/201705101440086...,DDR-SEGMENTATION/test/label/HE/201705101440086...,DDR-SEGMENTATION/test/label/MA/201705101440086...,DDR-SEGMENTATION/test/label/SE/201705101440086...,


In [10]:
# Split the dataframe for testing and training
train_df, test_df = train_test_split(
    df,
    test_size = 0.25,
    random_state = 42,
    shuffle = True
)

train_df = train_df.reset_index(drop = True)
test_df  = test_df.reset_index(drop = True)

## **2. Dataset and Augmentation Definitions**

In [11]:
resize_dimensions = 1024

def center_crop_largest_square(image, **kwargs):
    h, w = image.shape[:2]
    min_dim = min(h, w)
    top = (h - min_dim) // 2
    left = (w - min_dim) // 2
    return image[top : top + min_dim, left:left + min_dim]

In [23]:
class FundusSegmentationDataset(Dataset):
    """
    Helps produces the Fundus Dataset. There are 5 transform_type's. 
    We apply this class 5 times for each transform.

    Transforms:
    All transforms are applied to the masks and the image
    All transforms starting by cropping the largest square possible from the center of the image
    All transforms end by resizing to (self.dimensions, self.dimensions)
        t1 and test: Only Resize
        t2: Horizontal Flip
        t3: + or - 15% max zoom
        t4: Brightness and Contrast, Random Gamma
        t5: + or - 15 degree max rotation

    Default dimensions are 1024x1024. 
    Each mask is its own channel so the returned shapes are:
    image: (3, 1024, 1024)
    masks: (5, 1024, 1024)
    """
    def __init__(self, df: pd.DataFrame, dimensions: int = 1024, transform_type = None):
        self.df = df.reset_index(drop = True)
        self.dimensions = dimensions
        self.transform_type = transform_type
        self.transforms = self._build_transforms(transform_type)

    def _build_transforms(self, ttype):
        """ttype is any of (t1, t2, t3, t4, t5, test)"""
        if ttype is None:
            return None
        if ttype == 't1' or ttype == 'test':
            return A.Compose(
                [
                    A.Lambda(
                        image = center_crop_largest_square,
                        mask = center_crop_largest_square
                    ),
                    A.Resize(self.dimensions, self.dimensions)
            ],
            additional_targets = {
                    "mask1": "mask",
                    "mask2": "mask",
                    "mask3": "mask",
                    "mask4": "mask",
                    "mask5": "mask",
                }
            )
        if ttype == "t2":
            return A.Compose(
                [
                    A.Lambda(
                        image = center_crop_largest_square,
                        mask = center_crop_largest_square
                    ),
                    A.HorizontalFlip(p = 1.0),
                    A.Resize(self.dimensions, self.dimensions)
                ], 
                additional_targets = {
                    "mask1": "mask",
                    "mask2": "mask",
                    "mask3": "mask",
                    "mask4": "mask",
                    "mask5": "mask",
                }
            )
        if ttype == "t3":
            return A.Compose(
                [
                    A.Lambda(
                        image = center_crop_largest_square,
                        mask = center_crop_largest_square
                    ),
                    A.ShiftScaleRotate(
                        shift_limit = 0.0,
                        scale_limit = 0.15, 
                        rotate_limit = 0,
                        border_mode = cv2.BORDER_CONSTANT,
                        value = 0,
                        mask_value = 0,
                        p = 1.0
                    ),
                    A.Resize(self.dimensions, self.dimensions)
                ],
                additional_targets = {
                    "mask1": "mask",
                    "mask2": "mask",
                    "mask3": "mask",
                    "mask4": "mask",
                    "mask5": "mask",
                }
            )
        if ttype == "t4":
            return A.Compose(
                [
                    A.Lambda(
                        image = center_crop_largest_square,
                        mask = center_crop_largest_square
                    ),
                    A.RandomBrightnessContrast(
                        brightness_limit = 0.2,
                        contrast_limit = 0.2,
                        p = 1.0
                    ),
                    A.Gamma(
                        gamma_limit = (80, 120),
                        p = 0.5
                    ),
                    A.Resize(self.dimensions, self.dimensions)
                ],
                additional_targets = {
                    "mask1": "mask",
                    "mask2": "mask",
                    "mask3": "mask",
                    "mask4": "mask",
                    "mask5": "mask",
                }
            )
        if ttype == "t5":
            return A.Compose(
                [
                    A.Lambda(
                        image = center_crop_largest_square,
                        mask = center_crop_largest_square
                    ),
                    A.Rotate(
                        limit = 15,
                        border_mode = cv2.BORDER_CONSTANT,
                        value = 0,
                        mask_value = 0,
                        p = 1.0
                    ),
                    A.Resize(self.dimensions, self.dimensions)
                ],
                additional_targets = {
                    "mask1": "mask",
                    "mask2": "mask",
                    "mask3": "mask",
                    "mask4": "mask",
                    "mask5": "mask",
                }
            )

    def __len__(self):
        return len(self.df)
    def __getitem__(self, index):
        row = self.df.loc[index]
        image = cv2.imread(row.image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        mask_paths = [row.ex_path, row.he_path, row.ma_path, row.se_path, row.od_path]

        masks = []
        for path in mask_paths:
            if path is None:
                height, width = image.shape[:2]
                mask = np.zeros((height, width), dtype = np.uint8)
            else:
                mask = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                mask = (mask > 0).astype(np.uint8)
            masks.append(mask)

        if self.transforms:
            data = self.transforms(
                image = image,
                mask1 = masks[0],
                mask2 = masks[1],
                mask3 = masks[2],
                mask4 = masks[3],
                mask5 = masks[4]
            )

            image = data["image"]
            masks = [
                data["mask1"],
                data["mask2"],
                data["mask3"],
                data["mask4"],
                data["mask5"]
            ]

        image = torch.from_numpy(image).permute(2, 0, 1).float() / 255.
        masks = torch.stack([torch.from_numpy(m) for m in masks]).float()

        return image, masks