# Subset scenes, DEMs

In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
import shapely
import geopandas as gpd
import rasterio as rio
from rasterio.windows import Window
import rasterio.mask 
import shutil
import glob
from osgeo import gdal
import xarray as xr
from PIL import Image

In [29]:
# load, crop, and save rasters

# list interferograms
main_dir = '/Users/qbren/Desktop/taco/projects/atmospheric_correction/data_processing/training_data'
igram_dir = f'{main_dir}/asc_crop_sanjuans_noise'
veloc_fn = f'{main_dir}/veloc_crop/asc_veloc_sanjuans.tif'

dirs = os.listdir(main_dir)

# loop through interferograms
for i, idir in enumerate(dirs):
    cwd = f'{main_dir}/{idir}'
    os.chdir(cwd)
    
    # select the line of sight displacement
    allfiles = os.listdir(cwd)
    for fn in allfiles:
        if fn[-12:] == 'los_disp.tif':
            noise_tif = fn
        elif fn[-7:] == 'dem.tif':
            dem_tif = fn
    noise_tif_fn = f'{igram_dir}/{idir}/{noise_tif}'
    dem_tif_fn =  f'{igram_dir}/{idir}/{dem_tif}'
    xsize, ysize = 125, 125  # define the test image size
    print("working on: ", noise_tif)
    
    subset_number = 50  # define the number of subsets
    subsets = 0
    counter = 0
    
    
    # loop until subsetting is finished
    while subsets < subset_number:
        counter+=1
        if counter>200 and subsets<1:
            print('too few appropriate subsets, moving to next int')
            break
        
        # open src image
        noise_src = rio.open(noise_tif_fn)
        noise_rs = noise_src.read(1) # also open as np array
        
        dem_src = rio.open(dem_tif_fn)
        dem_rs = dem_src.read(1) # also open as np array
        
        veloc_src = rio.open(veloc_fn)
        veloc_rs = veloc_src.read(1) # also open as np array
        
        # create bounds for new origin location
        xmin, xmax = 0, noise_src.width - xsize
        ymin, ymax = 0, noise_src.height - ysize
        xoff, yoff = np.random.randint(xmin, xmax), np.random.randint(ymin, ymax)
        
        temp_noise = noise_rs[yoff:(yoff+ysize), xoff:(xoff+xsize)]
        temp_dem = dem_rs[yoff:(yoff+ysize), xoff:(xoff+xsize)]
        temp_veloc = veloc_rs[yoff:(yoff+ysize), xoff:(xoff+xsize)]
         
        if not 0.0 in temp_noise: # only save if there are no nodata values in subset
            if len(temp_veloc[np.where((temp_veloc<-0.003)|(temp_veloc>0.003))]) > 50: #1% of total subset
                if temp_dem.mean() >= 3300: # only save if subset is at high elevation
                    window = Window(xoff, yoff, xsize, ysize)
                    noise_transform = noise_src.window_transform(window)
                    dem_transform = dem_src.window_transform(window)
                    veloc_transform = veloc_src.window_transform(window)

                    # write subset
                    noise_profile = noise_src.profile
                    dem_profile = dem_src.profile
                    veloc_profile = veloc_src.profile

                    noise_profile.update({
                        'height': xsize,
                        'width': ysize,
                        'transform': og_transform})
                    dem_profile.update({
                        'height': xsize,
                        'width': ysize,
                        'transform': dem_transform})
                    veloc_profile.update({
                        'height': xsize,
                        'width': ysize,
                        'transform': veloc_transform})
                    subsets+=1 # update index

                    noise_out_fn = f'/Users/qbren/Desktop/subsets/int/des{subsets}_{noise_tif[0:-13]}.tif'
                    dem_out_fn = f'/Users/qbren/Desktop/subsets/dem/des{subsets}_{noise_tif[0:-13]}.tif'
                    veloc_out_fn = f'/Users/qbren/Desktop/subsets/veloc/des{subsets}_{noise_tif[0:-13]}.tif'

                    with rasterio.open(noise_out_fn, 'w', **noise_profile) as noise_dst:
                        # Read the data from the window and write it to the output raster
                        noise_dst.write(noise_src.read(window=window))
                    noise_src.close()

                    with rasterio.open(dem_out_fn, 'w', **dem_profile) as dem_dst:
                        # Read the data from the window and write it to the output raster
                        dem_dst.write(dem_src.read(window=window))
                    dem_src.close()

                    with rasterio.open(veloc_out_fn, 'w', **veloc_profile) as veloc_dst:
                        # Read the data from the window and write it to the output raster
                        veloc_dst.write(veloc_src.read(window=window))
                    veloc_src.close()

working on:  S1AA_20170502T130927_20170514T130928_VVP012_INT40_G_ueF_54EC_los_disp.tif
too few appropriate subsets
working on:  S1AA_20170514T130928_20170526T130928_VVP012_INT40_G_ueF_3D22_los_disp.tif
working on:  S1AA_20170526T130928_20170607T130929_VVP012_INT40_G_ueF_2A5A_los_disp.tif
too few appropriate subsets
working on:  S1AA_20170607T130929_20170619T130930_VVP012_INT40_G_ueF_23F5_los_disp.tif
working on:  S1AA_20170619T130930_20170701T130930_VVP012_INT40_G_ueF_0774_los_disp.tif
working on:  S1AA_20170701T130930_20170713T130931_VVP012_INT40_G_ueF_5F4E_los_disp.tif
working on:  S1AA_20170713T130931_20170725T130932_VVP012_INT40_G_ueF_5F49_los_disp.tif
working on:  S1AA_20170725T130932_20170806T130932_VVP012_INT40_G_ueF_1998_los_disp.tif
working on:  S1AA_20170806T130932_20170818T130933_VVP012_INT40_G_ueF_7736_los_disp.tif
working on:  S1AA_20170818T130933_20170830T130933_VVP012_INT40_G_ueF_42CC_los_disp.tif
working on:  S1AA_20170830T130933_20170911T130934_VVP012_INT40_G_ueF_66BB_

## Combine ints and velocity to make training images

In [6]:
# list subsets 
int_fns = os.listdir(f'{main_dir}/subsets/int')
veloc_fns = os.listdir(f'{main_dir}/subsets/veloc')
dem_fns = os.listdir(f'{main_dir}/subsets/dem')

def list_tifs(my_fns):
    my_list = []
    for i in my_fns:
        if i[-4:] == '.tif':
            my_list.append(i)
    return my_list

int_list = list_tifs(int_fns)
veloc_list = list_tifs(veloc_fns)
dem_list = list_tifs(dem_fns)

In [7]:
# create training images

for i, fn in enumerate(int_list):
    if fn in veloc_list and fn in dem_list:
        int_fn = f'{main_dir}/subsets/int/{fn}'
        veloc_fn = f'{main_dir}/subsets/veloc/{fn}'

        int_src = rio.open(int_fn)
        int_rs = int_src.read(1) # also open as np array

        veloc_src = rio.open(veloc_fn)
        veloc_rs = veloc_src.read(1) # also open as np array
        
        snr_dups = 10
        
        for i in range(snr_dups):
            scalar = np.round(np.random.lognormal(0.05, 1.), 3) # draw random number from lognormal distribution
            train_rs = int_rs+(veloc_rs*scalar)

            out_fn = f'{main_dir}/subsets/train/{fn}_scl{scalar}'

            im = Image.fromarray(train)
            im.save(out_fn)