In [1]:
import sys
sys.path.append("..")
from utils.download_dataset import download_dataset, extract_zip

In [2]:
import numpy as np
import PIL

from fastai.vision.all import *
from pathlib import Path
from tqdm.auto import tqdm
from shutil import copyfile

In [3]:
DATA_PATH = Path.cwd()/'data'
if not DATA_PATH.exists():
    DATA_PATH.mkdir(exist_ok=True)

fpath = download_dataset(dataset_name='DEEP_GLOBE_ROAD_EXTRACTION', dest_dir=DATA_PATH)
fpath

File exists: /home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION.zip


Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION.zip')

In [4]:
dir_path = extract_zip(fpath)
dir_path

Directory exists: /home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION


Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION')

In [5]:
df = pd.read_csv(dir_path/'class_dict.csv')
df['pixel_value'] =  round(df['r'] * 299/1000 + df['g'] * 587/1000 + df['b'] * 114/1000,0).astype(int, copy=False)
df['class_name'] = np.arange(len(df))
df['rgb'] = df.apply(lambda row: [row['r'], row['g'], row['b']], axis=1)
df

Unnamed: 0,name,r,g,b,pixel_value,class_name,rgb
0,road,255,255,255,255,0,"[255, 255, 255]"
1,background,0,0,0,0,1,"[0, 0, 0]"


In [6]:
codes = df['name'].values
codes

array(['road', 'background'], dtype=object)

In [7]:
p2d = dict(df[['pixel_value','class_name']].values)
p2d

{255: 0, 0: 1}

In [8]:
c2rgb = dict(df[['class_name','rgb']].values)
c2rgb

{0: [255, 255, 255], 1: [0, 0, 0]}

In [9]:
dir_path

Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION')

In [10]:
img_fpaths = get_files(dir_path/'train', extensions='.jpg')
img_fpaths

(#6226) [Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/110190_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/325890_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/877613_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/138350_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/598497_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/823500_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/24405_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/668618_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/759655_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DE

In [11]:
def get_mask_fpath(fpath): 
    return Path(str(fpath).replace('sat.jpg', 'mask.png'))

get_mask_fpath(img_fpaths[0])

Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_ROAD_EXTRACTION/train/110190_mask.png')

In [12]:
p2d

{255: 0, 0: 1}

In [13]:
sorted(p2d.keys())

[0, 255]

In [14]:
def get_msk(fpath, p2d=p2d):
    mask_fpath = get_mask_fpath(fpath)
    mask_img = np.array(Image.open(mask_fpath).convert("L"))
    for ii in sorted(p2d.keys()):
        # it's important that keys are sorted
        # otherwise we might overwrite the values twice
        mask_img[mask_img == ii] = p2d[ii]
    return Image.fromarray(mask_img)


msk = get_msk(img_fpaths[3])
print(np.unique(msk))

[0 1]


In [15]:
path_images = dir_path/'images'
if not path_images.exists():
    path_images.mkdir(exist_ok=True)
    
path_labels = dir_path/'labels'
if not path_labels.exists():
    path_labels.mkdir(exist_ok=True)

In [16]:
for src_img_path in tqdm(img_fpaths):
    msk = get_msk(src_img_path)
    dst_img_fpath = path_images/src_img_path.name.replace('_sat', '')
    dst_msk_fpath = path_labels/src_img_path.name.replace('_sat.jpg', '.png')
    msk.save(dst_msk_fpath)
    copyfile(src_img_path, dst_img_fpath)

  0%|          | 0/6226 [00:00<?, ?it/s]

In [20]:
new_img_fpaths = get_files(path_images)
new_msk_fpaths = get_files(path_labels)
assert len(new_img_fpaths) == len(img_fpaths)
assert len(new_msk_fpaths) == len(img_fpaths)