# Loading in CitySpaces data from local

In [19]:
!pip install python-dotenv



In [20]:
from dotenv import load_dotenv
import os
import shutil

load_dotenv()
source_dir = os.getenv("SRC_DATA_PATH") # change to your own source folder that is the your unzipped cityScapes folder
print(source_dir)
dest_dir = os.getcwd()

os.makedirs(os.path.join(dest_dir, "images"), exist_ok=True)
# creating two subdirs for train and test data
os.makedirs(os.path.join(dest_dir, "images", "train"), exist_ok=True)
os.makedirs(os.path.join(dest_dir, "images", "test"), exist_ok=True)

# CityScapes dataset is in the format:
# train
    # img
    # label
# val
    # img
    # label
def copy_folder(folder, target):
    folder = os.path.join(source_dir, folder)

    for subfolder in os.listdir(folder):
        if subfolder == ".DS_Store":
            continue
        for file in os.listdir(os.path.join(folder, subfolder)):
            try:
                    
                src = os.path.join(folder, subfolder, file)
                os.makedirs(os.path.join(dest_dir, "images", target, subfolder), exist_ok=True) # creating label, images subfolders
                dest = os.path.join(dest_dir, "images", target, subfolder, file)

                if os.path.exists(dest): # Added to avoid copying the same file again
                    print(f"File {dest} already exists. Skipping.")
                    continue

                if os.path.exists(src):
                    print(f"Copying {src} to {dest}")
                    shutil.copy2(src, dest)
            
            except Exception as e:
                print("Error: {e}")

copy_folder("train", "train")
copy_folder("val", "test")
print("Data copied successfully!")

/Users/adarshdanda/Desktop/CityScapesData
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train1760.png already exists. Skipping.
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train2269.png already exists. Skipping.
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train1006.png already exists. Skipping.
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train1012.png already exists. Skipping.
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train1774.png already exists. Skipping.
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train2255.png already exists. Skipping.
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train1984.png already exists. Skipping.
File /Users/adarshdanda/MLResumeProj/SemanticSegmentation/images/train/label/train2533.png already exists. Skipping.
File /Users/adarshdand

## Creating Custom Data Loader Pytorch

In [21]:
import matplotlib.pyplot as plt

In [26]:
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os

class CityscapesDataLoader(Dataset):
    def __init__(self, image_paths, label_paths, transform=None):
        self.image_paths = image_paths
        self.label_paths = label_paths
        self.transform = transform
        self.images = sorted(os.listdir(image_paths))
        self.masks = sorted(os.listdir(label_paths))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.label[idx]
        img = Image.open(img).convert("RGB")
        label = Image.open(label)

        if self.transform:
            img = self.transform(img)
            label = self.transform(label)

        return img, label

In [24]:
from torchvision import transforms

def transform_data(img, label):
    transform_img = transforms.Compose([
        transforms.Resize((256, 512)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Image Net stats
    ])

    transform_label = transforms.Compose([
        transforms.Resize((256, 512)),
        transforms.ToTensor()
    ])

    return transform_img(img), transform_label(label)


In [28]:
from torch.utils.data import DataLoader
train_path = './images/train'
test_path = './images/test'

train_data = CityscapesDataLoader(os.path.join(train_path, 'img'), os.path.join(train_path, 'label'), transform=transform_data)
test_data = CityscapesDataLoader(os.path.join(test_path, 'img'), os.path.join(test_path, 'label'), transform=transform_data)

train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
test_loader = DataLoader(test_data, batch_size=4, shuffle=False)