I could not manage to download a zip file, unzip and load it here by code. I kept getting the same error. which is connection error to the github page although I tried many solutions from the internet. Therefore, it uses local files in the following link: https://github.com/betul209/deeplearning/blob/e0522abd50a60b21c57b44967640c706055b4c51/dat.zip

In [24]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

To ease reading and running the code, I decided to put directories here:

In [36]:
DATA_DIR = r"/mnt/c/Users/psybe/Desktop/dat"
FINALDATA_DIR = r"/mnt/c/Users/psybe/Desktop/dat_split"


I did not use the data for my project so I found another dataset from internet and used it. Each image in the dataset contains two images, each of them either labeled as real or impossible depending on if the image is possible to exist in 3D space. Images are simple drawing illusions, and their labels are in their file names. I only included 800 images from Illusion Visual Question dataset (impossible object class) however it is part of the following project: https://illusionvqa.github.io/

Since each picture includes 2 objects, I first divided images in halves and saved them as individual images, with their labels. 

In [None]:

def split_and_save_images(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    for file_name in os.listdir(input_dir):
        if file_name.endswith((".png", ".jpg", ".jpeg")):
            img_path = os.path.join(input_dir, file_name)

            try:
                img = Image.open(img_path)

                width, height = img.size
                mid_width = width // 2 

                left_image = img.crop((0, 0, mid_width, height)) 
                right_image = img.crop((mid_width, 0, width, height))

                base_name, ext = os.path.splitext(file_name)
                parts = base_name.split("_")
                if len(parts) < 3:
                    raise ValueError(f"No file: {file_name}")

                left_label = parts[1] 
                right_label = parts[2] 

                left_image_name = f"{parts[0]}_left_{left_label}{ext}"
                right_image_name = f"{parts[0]}_right_{right_label}{ext}"

                left_image.save(os.path.join(output_dir, left_image_name))
                right_image.save(os.path.join(output_dir, right_image_name))

                print(f"Cut in half and saved: {file_name} -> {left_image_name}, {right_image_name}")

            except Exception as e:
                print(f"Error {file_name}: {e}")

input_dir = DATA_DIR 
output_dir = FINALDATA_DIR

split_and_save_images(input_dir, output_dir)


Cut in half and saved: 0000_real_impossible_p.png -> 0000_left_real.png, 0000_right_impossible.png
Cut in half and saved: 0001_impossible_real_p.png -> 0001_left_impossible.png, 0001_right_real.png
Cut in half and saved: 0002_real_impossible_p.png -> 0002_left_real.png, 0002_right_impossible.png
Cut in half and saved: 0003_impossible_real_p.png -> 0003_left_impossible.png, 0003_right_real.png
Cut in half and saved: 0004_real_impossible_p.png -> 0004_left_real.png, 0004_right_impossible.png
Cut in half and saved: 0005_impossible_real_p.png -> 0005_left_impossible.png, 0005_right_real.png
Cut in half and saved: 0006_real_impossible_p.png -> 0006_left_real.png, 0006_right_impossible.png
Cut in half and saved: 0007_impossible_real_p.png -> 0007_left_impossible.png, 0007_right_real.png
Cut in half and saved: 0008_real_impossible_p.png -> 0008_left_real.png, 0008_right_impossible.png
Cut in half and saved: 0009_impossible_real_p.png -> 0009_left_impossible.png, 0009_right_real.png
Cut in hal

As my dataset is ready, I named it RealImpossibleDataset, it will read data and returns to the labels and its images, which are inputs and ground truths (the label). 

In [None]:
class RealImpossibleDataset(Dataset):
    def __init__(self, data_dir, file_list=None, transforms=None):
        self.data_dir = data_dir
        self.file_list = file_list or os.listdir(data_dir)
        self.transforms = transforms

    def __getitem__(self, idx):
        file_name = self.file_list[idx]
        img_path = os.path.join(self.data_dir, file_name)
        image = Image.open(img_path).convert("RGB")
        
        if "real" in file_name and "impossible" not in file_name:
            label = 1  # Real
        elif "impossible" in file_name:
            label = 0  # Impossible
        else:
            raise ValueError(f"Invalid filename format: {file_name}")

        if self.transforms:
            image = self.transforms(image)
        
        return {"image": image, "label": torch.tensor(label, dtype=torch.long)}

    def __len__(self):
        return len(self.file_list)


Transform in terms of size and color channels will be defining as following for more stable optimization

In [None]:
def get_transforms(target_size, for_network=True):
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    transform_funs = [
        transforms.Resize(target_size),
        transforms.CenterCrop(target_size)
    ]
    if for_network:
        transform_funs.extend([
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
    return transforms.Compose(transform_funs)


Here, I defined training and validation sets, which corresponds to 80% and 20%, respectively. After defining data loader, I visualize 15 images from the dataset. 

In [None]:

def split_train_valid(data_dir, val_percent=0.2):
    file_names = os.listdir(data_dir)
    file_ids = list(range(len(file_names)))
    np.random.shuffle(file_ids)
    
    split = int(len(file_ids) * val_percent)
    valid_ids, train_ids = file_ids[:split], file_ids[split:]
    
    train_files = [file_names[i] for i in train_ids]
    valid_files = [file_names[i] for i in valid_ids]
    
    return train_files, valid_files

def build_dataloader(data_dir, file_list, batch_size, transforms):
    dataset = RealImpossibleDataset(data_dir, file_list, transforms)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataloader

def visualize_samples(dataset, num_samples=15):
    """
    Visualize multiple samples from the dataset.
    """
    fig, axes = plt.subplots(3, 5, figsize=(15, 9)) 
    axes = axes.flatten()  # Flatten the axes for easy iteration

    for i, ax in enumerate(axes[:num_samples]):
        sample = dataset[i] 
        image = sample["image"].permute(1, 2, 0)  
        label = "Real" if sample["label"] == 1 else "Impossible"

        ax.imshow(image)
        ax.set_title(label)
        ax.axis("off")

    plt.tight_layout()
    plt.show()


Lastly, code runs in this part, which is named as main. Bstch size 128 is just for fun, i just adjusted it to be 10 batches. It gives an error that I did not remove while printing the images but it does print the images so I decided it is not a huge problem (but maybe it is a huge problem that I am not aware of). 

In [None]:
def main():
    data_dir = FINALDATA_DIR

    transforms = get_transforms((100, 100))
    train_files, valid_files = split_train_valid(data_dir)

    batch_size = 128
    train_loader = build_dataloader(data_dir, train_files, batch_size, transforms)
    valid_loader = build_dataloader(data_dir, valid_files, batch_size, transforms)

    train_dataset = RealImpossibleDataset(data_dir, train_files, transforms)
    visualize_samples(train_dataset, num_samples=15)

    for batch in train_loader:
        images, labels = batch["image"], batch["label"]
        print(f"Batch size: {len(images)}")
        print(f"Labels: {labels}")
        break

    print(f"Train Loader Ready with {len(train_files)} samples")
    print(f"Validation Loader Ready with {len(valid_files)} samples")

if __name__ == "__main__":
    main()
