In [3]:
import argparse
import os
from tqdm import tqdm

import sys
sys.path.append("spacenet_lib")

In [8]:
sys.path

['c:\\Users\\toadi\\Documents\\GitHub\\spacenet_building_detection\\src\\features',
 'c:\\Users\\toadi\\anaconda3\\python39.zip',
 'c:\\Users\\toadi\\anaconda3\\DLLs',
 'c:\\Users\\toadi\\anaconda3\\lib',
 'c:\\Users\\toadi\\anaconda3',
 '',
 'c:\\Users\\toadi\\anaconda3\\lib\\site-packages',
 'c:\\Users\\toadi\\anaconda3\\lib\\site-packages\\win32',
 'c:\\Users\\toadi\\anaconda3\\lib\\site-packages\\win32\\lib',
 'c:\\Users\\toadi\\anaconda3\\lib\\site-packages\\Pythonwin',
 'c:\\Users\\toadi\\anaconda3\\lib\\site-packages\\IPython\\extensions',
 'C:\\Users\\toadi\\.ipython',
 'spacenet_lib']

In [57]:
currentpath = os.getcwd()
print(currentpath)

lev2up = os.path.dirname(os.path.dirname(currentpath))
print(lev2up)

datadir = os.path.join(lev2up,"data")
rasterdir = os.path.join(datadir,"3band")
vectordir = os.path.join(datadir,"processedBuildingLabels\\vectordata\\geojson")
destdir = os.path.join(datadir,"buildingMaskImages")
splitdir = os.path.join(datadir,"dataSplit")
maskdir = os.path.join(datadir,"buildingMaskImages")
print(vectordir)

c:\Users\toadi\Documents\GitHub\spacenet_building_detection\src\features
c:\Users\toadi\Documents\GitHub\spacenet_building_detection
c:\Users\toadi\Documents\GitHub\spacenet_building_detection\data\processedBuildingLabels\vectordata\geojson


In [51]:
# Add create_poly_mask

# Reference: 
# https://medium.com/the-downlinq/getting-started-with-spacenet-data-827fd2ec9f53
# https://gist.github.com/avanetten/b295e89f6fa9654c9e9e480bdb2e4d60#file-create_building_mask-py

from osgeo import gdal, ogr
from PIL import Image
import numpy as np
import os
import random


def create_poly_mask(rasterSrc, vectorSrc, npDistFileName='', 
							noDataValue=0, burn_values=1):

	'''
	Create polygon mask for rasterSrc,
	Similar to labeltools/createNPPixArray() in spacenet utilities
	'''
	
	## open source vector file that truth data
	source_ds = ogr.Open(vectorSrc)
	source_layer = source_ds.GetLayer()

	## extract data from src Raster File to be emulated
	## open raster file that is to be emulated
	srcRas_ds = gdal.Open(rasterSrc)
	cols = srcRas_ds.RasterXSize
	rows = srcRas_ds.RasterYSize

	if npDistFileName == '':
		dstPath = ".tmp.tiff"
	else:
		dstPath = npDistFileName

	## create First raster memory layer, units are pixels
	# Change output to geotiff instead of memory 
	memdrv = gdal.GetDriverByName('GTiff') 
	dst_ds = memdrv.Create(dstPath, cols, rows, 1, gdal.GDT_Byte, 
						   options=['COMPRESS=LZW'])
	dst_ds.SetGeoTransform(srcRas_ds.GetGeoTransform())
	dst_ds.SetProjection(srcRas_ds.GetProjection())
	band = dst_ds.GetRasterBand(1)
	band.SetNoDataValue(noDataValue)    
	gdal.RasterizeLayer(dst_ds, [1], source_layer, burn_values=[burn_values])
	dst_ds = 0

	mask_image = Image.open(dstPath)
	mask_image = np.array(mask_image)

	if npDistFileName == '':
		os.remove(dstPath)
		
	return mask_image

In [49]:
def build_labels(src_raster_dir, src_vector_dir, dst_dir):
	
	os.makedirs(dst_dir, exist_ok=True)

	file_count = len([f for f in os.walk(src_vector_dir).__next__()[2] if f[-8:] == ".geojson"])

	print("[INFO] Found {} geojson files. Preparing building mask images...".format(file_count))

	for idx in tqdm(range(1, file_count + 1)):

		src_raster_filename = "3band_AOI_1_RIO_img{}.tif".format(idx)
		src_vector_filename = "Geo_AOI_1_RIO_img{}.geojson".format(idx)

		src_raster_path = os.path.join(src_raster_dir, src_raster_filename)
		src_vector_path = os.path.join(src_vector_dir, src_vector_filename)
		dst_path = os.path.join(dst_dir, src_raster_filename)

		create_poly_mask(
			src_raster_path, src_vector_path, npDistFileName=dst_path, 
			noDataValue=0, burn_values=255
		)


# if __name__ == "__main__":

# 	parser = argparse.ArgumentParser()

# 	parser.add_argument('src_raster_dir', help='Root directory for raster files (.tif)')
# 	parser.add_argument('src_vector_dir', help='Root directory for vector files (.geojson)')
# 	parser.add_argument('dst_dir', help='Output directory')

# 	args = parser.parse_args()

# 	build_labels(args.src_raster_dir, args.src_vector_dir, args.dst_dir)

In [50]:
build_labels(rasterdir,vectordir,destdir)

[INFO] Found 6940 geojson files. Preparing building mask images...


100%|██████████| 6940/6940 [03:54<00:00, 29.59it/s]


In [70]:
# Data Split

class LabeledImageDataset():
    def __init__(self, dataset, root, label_root, dtype=np.float32,
                 label_dtype=np.int32, mean=0, crop_size=256, test=False,
                 distort=False):
        dataset_path = dataset
        with open(dataset_path) as f:
            pairs = []
            for i, line in enumerate(f):
                line = line.rstrip('\n')
                image_filename = line
                label_filename = line
                pairs.append((image_filename, label_filename))
        self._pairs = pairs
        self._root = root
        self._label_root = label_root
        self._dtype = dtype
        self._label_dtype = label_dtype
        self._mean = mean[np.newaxis, np.newaxis, :]
        self._crop_size = crop_size
        self._test = test
        self._distort = distort

    def __len__(self):
        return len(self._pairs)

    # def get_example(self, i):
    #     image_filename, label_filename = self._pairs[i]
        
    #     image_path = os.path.join(self._root, image_filename)
    #     image = _read_image_as_array(image_path, self._dtype)
    #     if self._distort:
    #         image = random_color_distort(image)
    #         image = np.asarray(image, dtype=self._dtype)

    #     image = (image - self._mean) / 255.0
        
    #     label_path = os.path.join(self._label_root, label_filename)
    #     label_image = _read_image_as_array(label_path, self._label_dtype)
        
    #     h, w, _ = image.shape
        
    #     label = np.zeros(shape=[h, w], dtype=np.int32) # 0: background
    #     label[label_image > 0] = 1 # 1: "building"
        
    #     # Padding
    #     if (h < self._crop_size) or (w < self._crop_size):
    #         H, W = max(h, self._crop_size), max(w, self._crop_size)
            
    #         pad_y1, pad_x1 = (H - h) // 2, (W - w) // 2
    #         pad_y2, pad_x2 = (H - h - pad_y1), (W - w - pad_x1)
    #         image = np.pad(image, ((pad_y1, pad_y2), (pad_x1, pad_x2), (0, 0)), 'symmetric')

    #         if self._test:
    #             # Pad with ignore_value for test set
    #             label = np.pad(label, ((pad_y1, pad_y2), (pad_x1, pad_x2)), 'constant', constant_values=255)
    #         else:
    #             # Pad with original label for train set  
    #             label = np.pad(label, ((pad_y1, pad_y2), (pad_x1, pad_x2)), 'symmetric')
            
    #         h, w = H, W
        
    #     # Randomly flip and crop the image/label for train-set
    #     if not self._test:

    #         # Horizontal flip
    #         if random.randint(0, 1):
    #             image = image[:, ::-1, :]
    #             label = label[:, ::-1]

    #         # Vertical flip
    #         if random.randint(0, 1):
    #             image = image[::-1, :, :]
    #             label = label[::-1, :]                
            
    #         # Random crop
    #         top  = random.randint(0, h - self._crop_size)
    #         left = random.randint(0, w - self._crop_size)
        
    #     # Crop the center for test-set
    #     else:
    #         top = (h - self._crop_size) // 2
    #         left = (w - self._crop_size) // 2
        
    #     bottom = top + self._crop_size
    #     right = left + self._crop_size
        
    #     image = image[top:bottom, left:right]
    #     label = label[top:bottom, left:right]
            
    #     return image.transpose(2, 0, 1), label

In [71]:
# Data split 

def dump_filenames(filenames, dst_path):

	with open(dst_path, 'w') as f:
		
		for i, filename in enumerate(filenames):
			if i != 0:
				f.write("\n")

			f.write(filename)


def split_dataset(img_dir, dst_dir, ratio, seed=0):
	
	filenames = os.listdir(img_dir)

	random.seed(seed)
	random.shuffle(filenames)

	file_count = len(filenames)

	train_ratio, val_ratio, test_ratio = ratio
	total = train_ratio + val_ratio + test_ratio

	train_count= int(float(file_count * train_ratio) / float(total))
	val_count = int(float(file_count * val_ratio) / float(total))

	train_files = filenames[:train_count]
	val_files = filenames[train_count:train_count + val_count]
	test_files = filenames[train_count + val_count:]

	dump_filenames(train_files, os.path.join(dst_dir, "train.txt"))
	dump_filenames(val_files, os.path.join(dst_dir, "val.txt"))
	dump_filenames(test_files, os.path.join(dst_dir, "test.txt"))


In [72]:
# Load mean image
mean = np.load(os.path.join(splitdir, "mean.npy"))

train = LabeledImageDataset(os.path.join(splitdir, "train.txt"), rasterdir, maskdir, 
                                mean=mean, crop_size=400, test=False, distort=False)
    
test = LabeledImageDataset (os.path.join(splitdir, "val.txt"), rasterdir, maskdir, 
                                mean=mean, crop_size=400, test=True, distort=False)

In [75]:
test._pairs

[('3band_AOI_1_RIO_img2200.tif', '3band_AOI_1_RIO_img2200.tif'),
 ('3band_AOI_1_RIO_img4752.tif', '3band_AOI_1_RIO_img4752.tif'),
 ('3band_AOI_1_RIO_img638.tif', '3band_AOI_1_RIO_img638.tif'),
 ('3band_AOI_1_RIO_img6221.tif', '3band_AOI_1_RIO_img6221.tif'),
 ('3band_AOI_1_RIO_img5460.tif', '3band_AOI_1_RIO_img5460.tif'),
 ('3band_AOI_1_RIO_img2039.tif', '3band_AOI_1_RIO_img2039.tif'),
 ('3band_AOI_1_RIO_img3883.tif', '3band_AOI_1_RIO_img3883.tif'),
 ('3band_AOI_1_RIO_img3561.tif', '3band_AOI_1_RIO_img3561.tif'),
 ('3band_AOI_1_RIO_img6177.tif', '3band_AOI_1_RIO_img6177.tif'),
 ('3band_AOI_1_RIO_img2231.tif', '3band_AOI_1_RIO_img2231.tif'),
 ('3band_AOI_1_RIO_img5577.tif', '3band_AOI_1_RIO_img5577.tif'),
 ('3band_AOI_1_RIO_img811.tif', '3band_AOI_1_RIO_img811.tif'),
 ('3band_AOI_1_RIO_img5339.tif', '3band_AOI_1_RIO_img5339.tif'),
 ('3band_AOI_1_RIO_img6622.tif', '3band_AOI_1_RIO_img6622.tif'),
 ('3band_AOI_1_RIO_img908.tif', '3band_AOI_1_RIO_img908.tif'),
 ('3band_AOI_1_RIO_img5316.tif'

In [61]:
# Model Creation - Convolution Nural Network

In [None]:
aAAAAAAA