In [1]:
import sys
sys.path.append("..")
from utils.download_dataset import download_dataset, extract_zip

In [2]:
import numpy as np
import PIL

from fastai.vision.all import *
from pathlib import Path
from tqdm.auto import tqdm
from shutil import copyfile, rmtree

In [3]:
DATA_PATH = Path.cwd()/'data'
if not DATA_PATH.exists():
    DATA_PATH.mkdir(exist_ok=True)

fpath = download_dataset(dataset_name='DEEP_GLOBE_LAND_COVER', dest_dir=DATA_PATH)
fpath

File exists: /home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER.zip


Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER.zip')

In [4]:
dir_path = extract_zip(fpath)
dir_path

Directory exists: /home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER


Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER')

In [5]:
df = pd.read_csv(dir_path/'class_dict.csv')
df['pixel_value'] =  round(df['r'] * 299/1000 + df['g'] * 587/1000 + df['b'] * 114/1000,0).astype(int, copy=False)
df['class_name'] = np.arange(len(df))
df['rgb'] = df.apply(lambda row: [row['r'], row['g'], row['b']], axis=1)
df

Unnamed: 0,name,r,g,b,pixel_value,class_name,rgb
0,urban_land,0,255,255,179,0,"[0, 255, 255]"
1,agriculture_land,255,255,0,226,1,"[255, 255, 0]"
2,rangeland,255,0,255,105,2,"[255, 0, 255]"
3,forest_land,0,255,0,150,3,"[0, 255, 0]"
4,water,0,0,255,29,4,"[0, 0, 255]"
5,barren_land,255,255,255,255,5,"[255, 255, 255]"
6,unknown,0,0,0,0,6,"[0, 0, 0]"


In [6]:
codes = df['name'].values
codes

array(['urban_land', 'agriculture_land', 'rangeland', 'forest_land',
       'water', 'barren_land', 'unknown'], dtype=object)

In [7]:
p2d = dict(df[['pixel_value','class_name']].values)
p2d

{179: 0, 226: 1, 105: 2, 150: 3, 29: 4, 255: 5, 0: 6}

In [8]:
c2rgb = dict(df[['class_name','rgb']].values)
c2rgb

{0: [0, 255, 255],
 1: [255, 255, 0],
 2: [255, 0, 255],
 3: [0, 255, 0],
 4: [0, 0, 255],
 5: [255, 255, 255],
 6: [0, 0, 0]}

In [9]:
img_fpaths = get_files(dir_path/'DEEP_GLOBE_LAND_COVER', extensions='.jpg')
img_fpaths

(#803) [Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/860326_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/989499_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/733758_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/432089_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/565914_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/119012_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/335737_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/209787_sat.jpg'),Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_CO

In [10]:
def get_mask_fpath(fpath): 
    return Path(str(fpath).replace('sat.jpg', 'mask.png'))

get_mask_fpath(img_fpaths[0])

Path('/home/ubuntu/github/cv_fastai/segmentation/data/DEEP_GLOBE_LAND_COVER/DEEP_GLOBE_LAND_COVER/860326_mask.png')

In [11]:
p2d

{179: 0, 226: 1, 105: 2, 150: 3, 29: 4, 255: 5, 0: 6}

In [12]:
def get_msk(fpath, p2d=p2d):
    mask_fpath = get_mask_fpath(fpath)
    mask_img = np.array(Image.open(mask_fpath).convert("L"))
    for ii in sorted(p2d.keys()): 
        # it's important that keys are sorted
        # otherwise we might overwrite the values twice
        mask_img[mask_img == ii] = p2d[ii]
    return Image.fromarray(mask_img)

msk = get_msk(img_fpaths[0])
print(np.unique(msk))

[0 1 4 6]


In [13]:
path_images = dir_path/'images'
if not path_images.exists():
    path_images.mkdir(exist_ok=True)
    
path_labels = dir_path/'labels'
if not path_labels.exists():
    path_labels.mkdir(exist_ok=True)

In [14]:
for src_img_path in tqdm(img_fpaths):
    msk = get_msk(src_img_path)
    dst_img_fpath = path_images/src_img_path.name.replace('_sat', '')
    dst_msk_fpath = path_labels/src_img_path.name.replace('_sat.jpg', '.png')
    copyfile(src_img_path, dst_img_fpath)
    msk.save(dst_msk_fpath)

  0%|          | 0/803 [00:00<?, ?it/s]

In [15]:
new_img_fpaths = get_files(path_images)
new_msk_fpaths = get_files(path_labels)
assert len(new_img_fpaths) == len(img_fpaths)
assert len(new_msk_fpaths) == len(img_fpaths)