# Pre-process images: fetch, mosaic, crop, align and stack

#### 1. Fetching images

We have a list of images (RGB and SWIR) for each sample.
We'll fetch these from the Center for Imaging Science's FTP.
CIS is a part of the Rochester Institue of Technology.

#### 2. Clipping SWIR edges

Edges on SWIR data tend to have strange artefacts. We'll clip a couple of pixels
on edges (of NODATA values, not the whole extent) to remove them before creating
the mosaics.

#### 3. Mosaic RGB and SWIR images separately
We'll use GDAL's command line tools (via the `subprocess` module)

#### 4. Cropping mosaics
Same but using RIO since it's a friendlier interface.

#### 5. Rasterize LiDAR points to extract elevation and intensity
After converting LAS data to vector formats (using our `las_to_vector.py` script)
we'll rasterize the points to add them as another band.

#### 6. Aligning and stacking rasters
Once all bands are ready, we'll finish by aligning them, stacking and saving in 
a single GeoTIFF file. Current Python raster libraries (mainly `rasterio`) lack
a friendly way of resampling/aligning/stacking rasters so we'll use a little
R script to leverage R's `raster` library.

In [1]:
import os
import subprocess

from multiprocessing import Pool

import pandas as pd

from shapely.wkt import loads

from preprocessing_functions import fetch_image, get_image_metadata, remove_borders

In [2]:
## Load samples
samples = pd.read_csv("/Users/arredond/Desktop/haiti/data/samples/samples_reference.csv")

In [3]:
# Prepare absolute folders
path_raw = os.path.abspath('../data/imgs_raw/')
base_crop_folder = os.path.abspath('../data/imgs_crop')
lidar_raw = os.path.abspath('../data/lidar/las_to_vector/')
lidar_rast = os.path.abspath('../data/lidar/rasterized/')
lidar_limits = os.path.abspath('../data/lidar/mdt_limits/')
lidar_mask = os.path.abspath('../data/lidar/masked/')
results_folder = os.path.abspath('../data/imgs_final')

for folder in [path_raw, base_crop_folder, lidar_raw,
               lidar_rast, lidar_mask, results_folder]:  
    if not os.path.isdir(folder):
        os.mkdir(folder)

# GDAL setup
base_warp = [
    'gdalwarp',
    '-wo', 'SKIP_NOSOURCE=yes',
    '-ot', 'Int16',
    '-of', 'GTiff',
    '-srcnodata', '0',
    '-dstnodata', '0',
    '-r', 'cubic'
]

base_grid = [
    'gdal_grid',
    '-a', 'nearest:nodata=-99',
    '-of', 'GTiff',
    '-ot', 'Float32'
]

In [7]:
def process_sample(idx, row):
    # Get DF data
    sample = {
        'fid': row['FID'],
        'stratum': row['Class'].lower(),
        'rgb_imgs': row['rgb_imgs'].split(', '),
        'swir_imgs': row['swir_imgs'].split(', '),
        'bounds': list(loads(row['geometry_wkt']).bounds)
    }
    sample['name'] = f'sample_{sample["fid"]}_{sample["stratum"]}.tif'
    if sample['fid'] not in [0, 5, 8, 18, 19]:
        return None
    print(f'Working on {sample["name"]}')

    # Process spectral images (RGB and SWIR)
    for img_type in ['rgb', 'swir']:
        # Prepare paths
        base_folder = os.path.join(path_raw, img_type)
        mosaic_folder = os.path.join(base_folder, 'mosaics')
        crop_folder = os.path.join(base_crop_folder, img_type)
        swir_unclipped = os.path.join(base_folder, 'unclipped')
        
        for folder in [base_folder, mosaic_folder, crop_folder]:  
            if not os.path.isdir(folder):
                os.mkdir(folder)
        
        # Fetch images
        sample_imgs = sample[f'{img_type}_imgs']
        for img in sample_imgs:
            if not os.path.exists(os.path.join(base_folder, img)):
                fetch_image(
                    img,
                    'vnir' if img_type == 'rgb' else img_type,
                    base_folder
                )
        
            # Clip border from SWIR images
            if img_type == 'swir':
                if not os.path.isdir(swir_unclipped):
                    os.mkdir(folder)
                # Move original to "unclipped" folder
                os.rename(
                    os.path.join(base_folder, img),
                    os.path.join(swir_unclipped, img)
                )
                
                # Remove borders and save to base folder
                remove_borders(
                    os.path.join(swir_unclipped, img),
                    os.path.join(base_folder, img)
                )  
        
        # Make individual mosaics
        mosaic_file = os.path.join(mosaic_folder, sample['name'])
        subprocess.check_call(
            base_warp
            + [os.path.join(base_folder, x) for x in sample_imgs] # Input files
            + [mosaic_file] # Output file
        )
        
        # Crop mosaic to sample extent
        crop_file = os.path.join(crop_folder, sample['name'])
        subprocess.check_call(
            ['rio', 'clip', mosaic_file, crop_file, '--bounds']
            + [' '.join([str(x) for x in sample['bounds']])]
        )
    
    # Rasterize elevation AND intensity (interpolating) taking RGB cropped as reference
    lidar_sample = os.path.join(lidar_raw, f'FID_{sample["fid"]}_PointHeight_cor.gpkg')
    ref = os.path.join('../data/imgs_crop/rgb', sample['name'])
    rast_base = os.path.join(lidar_rast, sample['name'])
    w, h, bb = get_image_metadata(ref)
    
    lidar_rasters = {}
    for rast_field in ('z', 'intensity'):
        rast = rast_base.replace('.tif', f'_{rast_field}.tif')
        rast_flip = rast.replace('.tif', '_flip.tif')
        subprocess.check_call(
            base_grid
            + [
                '-zfield', rast_field,
                '-outsize', str(w), str(h),
                '-txe', str(bb[0]), str(bb[2]),
                '-tye', str(bb[1]), str(bb[3]),
                lidar_sample, # Input
                rast_flip # Output
            ]
        
        # gdal_grid "flips" raster so we're passing it through gdal_warp
        # to normalize the geotransform
        subprocess.check_call(['gdalwarp', rast_flip, rast])                  
        os.remove(rast_flip)
    
        # Crop rasters to MDT limits - make processes wait for eachother
        limits_shp = os.path.join(lidar_limits, f'FID_{sample["fid"]}_TIN.SHP')
        lidar_rasters[rast_field] = os.path.join(
            lidar_mask,
            sample['name'].replace('.tif', f'_{rast_field}.tif')
        )

        fio_cmd = ['fio', 'dump', limits_shp]
        rio_cmd = ['rio', 'mask', rast, lidar_rasters[rast_field], '--geojson-mask', '-']

        fio_proc = subprocess.Popen(fio_cmd, stdout=subprocess.PIPE)
        rio_proc = subprocess.check_call(rio_cmd, stdin=fio_proc.stdout)
        fio_proc.wait()
    
    ## Align and stack RGB + SWIR + elevation to final raster
    subprocess.check_call(
        [
            'Rscript', 'align_stack.R',
            ref,  # Reference image (RGB)
            os.path.join('../data/imgs_crop/swir/', sample['name']), # SWIR (to resample)
            lidar_rasters['z'],  # Elevation (to resample)
            lidar_rasters['intensity'], # Intensity (to resample)
            os.path.join(results_folder, sample['name'])     # Output
        ]
    )

In [8]:
# Run
if __name__ == '__main__':
    with Pool(os.cpu_count()-1) as pool:
        pool.starmap(process_sample, samples.iterrows())

Working on sample_5_rural.tif
Working on sample_0_residential.tif
Working on sample_8_residential.tif
Working on sample_18_urbanirreg.tif
Working on sample_19_rural.tif
