In [1]:
import os
import shutil
import pathlib

import numpy as np
from zipfile import ZipFile
from collections import defaultdict

from PIL import Image

In [2]:
from data.labels import labels

In [3]:
for row in labels:
    row = row
    print(f"{row.name:20} {row.id:2} {row.categoryId:1} {row.color}")

unlabeled             0 0 (0, 0, 0)
ego vehicle           1 0 (0, 0, 0)
rectification border  2 0 (0, 0, 0)
out of roi            3 0 (0, 0, 0)
static                4 0 (0, 0, 0)
dynamic               5 0 (111, 74, 0)
ground                6 0 (81, 0, 81)
road                  7 1 (128, 64, 128)
sidewalk              8 1 (244, 35, 232)
parking               9 1 (250, 170, 160)
rail track           10 1 (230, 150, 140)
building             11 2 (70, 70, 70)
wall                 12 2 (102, 102, 156)
fence                13 2 (190, 153, 153)
guard rail           14 2 (180, 165, 180)
bridge               15 2 (150, 100, 100)
tunnel               16 2 (150, 120, 90)
pole                 17 3 (153, 153, 153)
polegroup            18 3 (153, 153, 153)
traffic light        19 3 (250, 170, 30)
traffic sign         20 3 (220, 220, 0)
vegetation           21 4 (107, 142, 35)
terrain              22 4 (152, 251, 152)
sky                  23 5 (70, 130, 180)
person               24 6 (220, 20, 60)


In [4]:
catId2Id = {}
for row in labels:
    #print(f"{row.name:20} {row.id:2} {row.categoryId:1} {row.color}")
    catId2Id[row.id] = row.categoryId
print(catId2Id)

{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1, 8: 1, 9: 1, 10: 1, 11: 2, 12: 2, 13: 2, 14: 2, 15: 2, 16: 2, 17: 3, 18: 3, 19: 3, 20: 3, 21: 4, 22: 4, 23: 5, 24: 6, 25: 6, 26: 7, 27: 7, 28: 7, 29: 7, 30: 7, 31: 7, 32: 7, 33: 7, -1: 7}


## 1.1 Transfert des images sélectionnées <a class="anchor" id="prepa_transfert"></a> [⇪](#menu)

In [5]:
rgb_src_path = pathlib.Path('data', 'P8_Cityscapes_leftImg8bit_trainvaltest.zip')
ids_src_path = pathlib.Path('data', 'P8_Cityscapes_gtFine_trainvaltest.zip')
new_path = pathlib.Path('data', 'preprocessed_256x128')

#### On supprime un éventuel ancien dossier contenant les images

#### On crée un dossier pour stocker les images sélectionnées et on y copie les images

In [6]:
def get_files_list(zip_src_path):
    
    datasets = defaultdict(list)

    with ZipFile(zip_src_path, 'r') as zipObj:
        # Get list of files names in zip
        listOfiles = zipObj.namelist()
        # Iterate over the list of file names in given list & print them
        
        for elem in listOfiles:
            if ".png" in elem and "test" not in elem:
                sp = elem.split('/')
                source = sp[0]
                dataset = sp[1]
                city = sp[2]
                img = sp[3][:sp[3].rindex('_')]
                #print(f"{source}<-->{dataset}<-->{city}<-->{img}")
                
                datasets[dataset].append(img)
                
    for dataset in datasets:
        print(f"{dataset}_set: {len(datasets[dataset])} files")
    
    return datasets
    
datasets = get_files_list(zip_src_path=rgb_src_path)

train_set: 2975 files
val_set: 500 files


In [7]:
def preprocess_RGB_file(file, newsize):
    image = Image.open(file)

    # summarize some details about the image
    # print(image.format)
    # print(image.size)
    # print(image.mode)
    # print(newsize)
    
    new_image = image.resize(newsize)
    return new_image

In [8]:
def preprocess_IDS_file(file, newsize):
    image = Image.open(file)
    
    # summarize some details about the image
    # print(image.format)
    # print(image.size)
    # print(image.mode)
    
    np_image = np.asarray(image)
    np_image_converted = np.vectorize(catId2Id.get)(np_image)
    
    new_image = Image.fromarray(np.uint8(np_image_converted))
    new_image = image.resize(newsize, Image.Resampling.NEAREST)
    return new_image

In [10]:
def preprocess_files(rbg_src_path, ids_src_path, datasets, output_path, newsize):
    
    #os.mkdir(new_path)
    # On supprime un éventuel ancien dossier contenant les images
    shutil.rmtree(output_path)
    
    with ZipFile(rbg_src_path, 'r') as RGBzipObj, ZipFile(ids_src_path, 'r') as IDSzipObj:
            
        for dataset in datasets:                
            
            # On crée un dossier pour stocker les images sélectionnées et on y copie les images
            pathlib.Path(output_path, dataset).mkdir(parents=True, exist_ok=True)
                         
            count = 0
            for img_name in datasets[dataset]:
                city = img_name[:img_name.index('_')]
                    
                # Preprocess RGB files
                srcRGB = pathlib.Path('leftImg8bit', dataset, city, f"{img_name}_leftImg8bit.png")
                preprocessed = preprocess_RGB_file(RGBzipObj.extract(str(srcRGB)), newsize)
                #preprocessed = preprocess_RGB_file(srcRGB, newsize)
                preprocessed.save(pathlib.Path(output_path, dataset, f"{img_name}.png"))
                    
                # Preprocess LabelIds files
                srcIDS = pathlib.Path('gtFine', dataset, city, f"{img_name}_gtFine_labelIds.png")
                preprocessed = preprocess_IDS_file(IDSzipObj.extract(str(srcIDS)), newsize)
                #preprocessed = preprocess_IDS_file(srcIDS, newsize)
                preprocessed.save(pathlib.Path(output_path, dataset, f"{img_name}_labels.png"))
                    
                count += 1
            print(f"{dataset}_set: {count} files preprocessed")
                

output_path = pathlib.Path('data', 'preprocessed_256x128')
preprocess_files(rgb_src_path, ids_src_path, datasets, output_path, (256, 128))

train_set: 2975 files preprocessed
val_set: 500 files preprocessed
