In [None]:
import os
import cv2
import pandas as pd

from pathlib import Path

from xrkit.base import CONFIG

os.chdir("..")

In [None]:
info = pd.read_csv(Path(CONFIG.data.raw.path, "Data_Entry_2017.csv"), nrows=1000)
info.head()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

label_counts = info["Finding Labels"].value_counts()[:15]
fig, ax1 = plt.subplots(1, 1, figsize=(12, 8))
ax1.bar(np.arange(len(label_counts)) + 0.5, label_counts)
ax1.set_xticks(np.arange(len(label_counts)) + 0.5)
_ = ax1.set_xticklabels(label_counts.index, rotation=90)

In [None]:
info = pd.read_csv(Path(CONFIG.data.raw.path, "BBox_List_2017.csv"))
info.head()

In [None]:
from sklearn.model_selection import train_test_split

X = info["Image Index"]
top_left_y = info["Finding Label"]

X_train, X_test, y_train, y_test = train_test_split(
    X, top_left_y, test_size=0.2, stratify=top_left_y, random_state=34
)
X_train

In [None]:
test = info.sample(frac=0.15, random_state=34)
test["Finding Label"].value_counts(normalize=True)

In [None]:
test = info[info["Image Index"].isin(X_test.tolist())]
# test['Finding Label'].value_counts(normalize=True)
test

In [None]:
train = info[info["Image Index"].isin(X_train.tolist())]
train["Finding Label"].value_counts(normalize=True)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from PIL import Image
import torchvision

IMAGE_SIZE = CONFIG.base.image_size


class SegmentationDataset(Dataset):
    def __init__(self, data_subset: str):
        self.data_subset = data_subset

        self.transform = torchvision.transforms.Compose(
            [
                torchvision.transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                torchvision.transforms.ToTensor(),
            ]
        )

        self.set_info = self.split_data()

    def __len__(self):
        return len(self.set_info)

    def __getitem__(self, index, transform=True):
        image_path = train.iloc[index]["Image Index"]
        top_left_x, top_left_y, width, height = (
            train.iloc[index][["Bbox [x", "y", "w", "h]"]].astype(int).values
        )
        image = Image.open(next(Path(CONFIG.data.raw.path).rglob(image_path)).as_posix())

        image_shape = image.size[::-1]
        mask = np.zeros(image_shape, dtype=np.uint8)
        mask[top_left_y : top_left_y + height, top_left_x : top_left_x + width] = 255.0
        mask = Image.fromarray(mask)

        if transform:
            image = self.transform(image)
            mask = self.transform(mask)

        return image, mask

    def split_data(self):
        self.info = pd.read_csv(Path(CONFIG.data.raw.path, "BBox_List_2017.csv"))

        X = info["Image Index"]
        y = info["Finding Label"]

        X_train, X_test, _, _ = train_test_split(X, y, test_size=0.2, stratify=y, random_state=34)
        train_subset = info[info["Image Index"].isin(X_train.tolist())]
        test_subset = info[info["Image Index"].isin(X_test.tolist())]

        data_mapping = {"train": train_subset, "test": test_subset}

        if self.data_subset in data_mapping:
            return data_mapping[self.data_subset]
        else:
            raise ValueError("Invalid data type. Choose from 'train' or 'test'.")


image, mask = SegmentationDataset(data_subset="train").__getitem__(2, transform=True)

In [None]:
adjusted_image = image.numpy()[0] * 255
adj = Image.fromarray(adjusted_image)

adjusted_image = mask.numpy()[0] * 255
adj2 = Image.fromarray(adjusted_image)

display([adj.show(), adj2.show()])

In [None]:
display(image, mask)

In [None]:
image_shape = SegmentationDataset(data_subset="train").__getitem__(2)
# image_shape = (512, 512)  # Example image shape
# x, y, w, h = 10, 20, 30, 40
# mask = np.zeros(image_shape, dtype=np.uint8)
# mask[y: y+h, x: x+w] = 255
# Image.fromarray(mask)

In [None]:
image_path, top_left_x, top_left_y, width, height = (
    train.reset_index(drop=True).iloc[0][["Image Index", "Bbox [x", "y", "w", "h]"]].values
)
image_path, top_left_x, top_left_y, width, height

In [None]:
with open(Path(CONFIG.data.raw.path, "test_list.txt"), "r") as file:
    test_list = [line.strip() for line in file.readlines()]

test_list

In [None]:
with open(Path(CONFIG.data.raw.path, "train_val_list.txt"), "r") as file:
    train_val_list = [line.strip() for line in file.readlines()]

train_val_list

In [None]:
IMAGE_SIZE = 512
BATCH_SIZE = 16
NUMBER = 100

import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision


class LowLightDataset(Dataset):
    def __init__(self, image_paths):
        self.image_paths = image_paths
        self.transform = torchvision.transforms.Compose(
            [
                torchvision.transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Lambda(lambda x: x / 255.0),
            ]
        )

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        return image


def data_generator(low_light_images):
    dataset = LowLightDataset(low_light_images)
    dataloader = DataLoader(
        dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0, pin_memory=True, drop_last=False
    )
    return dataloader


images = [image.as_posix() for image in Path(BASE_PATH, CONFIG.data.raw.path).rglob("*.png")]

low_light_images = images[:100]
dataloader = data_generator(low_light_images)

for batch in dataloader:
    print(batch.shape)

In [None]:
images = [image.as_posix() for image in Path(BASE_PATH, CONFIG.data.raw.path).rglob("*.png")]

train_low_light_images = images[: 1 * NUMBER]
val_low_light_images = images[1 * NUMBER : 2 * NUMBER]
test_low_light_images = images[2 * NUMBER : 3 * NUMBER]


train_loader = data_generator(train_low_light_images)
validation_loader = data_generator(val_low_light_images)

print("Train Dataset:", train_loader)
print("Validation Dataset:", validation_loader)

In [3]:
inputs, targets = next(iter(train_loader))