In [1]:
import random
import os
import numpy as np
import pandas as pd
import rasterio as rio
import matplotlib.pyplot as plt
import xarray as xr
import datetime as dt
import rioxarray
import seaborn as sns
import geopandas as gpd
from glob import glob
import time

In [2]:
# load in single igram and other data 
def hyp3_to_xarray_single(path):
    '''
    Reads hyp3 outputs into xarray dataset from single hyp3 folder 
    '''
    # globs for data to load
    unw_phase_path = glob(f'{path}/*unw_phase.tif')[0]
    era5_path = glob(f'{path}/*ERA5.tif')[0]
    murp_path = glob(f'{path}/*MuRP.tif')[0]
    dem_path = glob(f'{path}/*dem.tif')[0]
    corr_path = glob(f'{path}/*corr.tif')[0]
    meta_path = glob(f'{path}/S1*.txt')[0]

    # list granules for coordinate
    granule = os.path.split(unw_phase_path)[-1][0:-14]

    d = {}
    with open(meta_path) as f:
        for line in f:
            (key, val) = line.split(':')
            d[key] = str.strip(val)

    # read unw_phase into data array and assign coordinates
    da = xr.open_dataset(unw_phase_path)
    da = da.assign_coords({'granule':('granule', [granule])})
    for item in d.keys():
            da = da.assign_coords({item:('granule', [d[item]])})
    
    # concatenate into dataset and rename variable
    ds = da.rename({'band_data': 'unw_phase'})

    #open coherence and dem into datasets
    era5_ds = xr.open_dataset(era5_path)
    murp_ds = xr.open_dataset(murp_path)
    dem_ds = xr.open_dataset(dem_path)
    corr_ds = xr.open_dataset(corr_path)

    # add coherence and dem to unw_phase dataset
    ds['era5_phase'] = (('band', 'y', 'x'), era5_ds.band_data.values)
    ds['murp_phase'] = (('band', 'y', 'x'), murp_ds.band_data.values)
    ds['elevation'] = (('band', 'y', 'x'), dem_ds.band_data.values)
    ds['coherence'] = (('band', 'y', 'x'), era5_ds.band_data.values)

    # remove band coordinate
    ds = ds.squeeze()

    return ds

def open_signal_maps(path, frame):
    signal_glob = glob(f'{path}/*{frame}*signal.tif')

    da_list = []
    for fn in signal_glob:
        da_list.append(xr.open_dataset(fn))
    signal_ds = xr.concat(da_list, dim='band')
    signal_ds = signal_ds.median(dim='band')

    return signal_ds

def sample_ds(ds, subset_size=128):
    minx = 0
    miny = 0
    maxx = len(ds.x)-subset_size
    maxy = len(ds.y)-subset_size

    sub_minx = random.randint(minx, maxx)
    sub_miny = random.randint(miny, maxy)
    subset = ds.isel(x=slice(sub_minx, sub_minx+subset_size), y=slice(sub_miny, sub_miny+subset_size))
    
    return subset

In [4]:
orbit = 'DT56'
year = '2017'
frame = 'frame_3'

In [6]:
def subset_noise(orbit, 
                 frame, 
                 year_list, 
                 tiles_path, 
                 subsets_desired, 
                 subset_type,
                 subset_size=128, 
                 max_time_s=5,
                 max_per_tile=5):
    '''
    subset hyp3 outputs using tiles
    '''
    home_path = '/mnt/c/Users/qbren/Desktop/taco/projects/indennt/proc/data'
    signal_path = f'{home_path}/signal_maps/{orbit}'
    tiles = gpd.read_file(f'../polygons/{subset_type}_RGI_grid_25km.shp')
    signal_ds = open_signal_maps(signal_path, frame)
    tiles = gpd.read_file(tiles_path)

    # set number of subsets to 0
    subset_counter = 0

    # continue to run until desired subset number is reached
    while subset_counter < subsets_desired:
        random.shuffle(year_list)
        for year in year_list:
            data_path = f'{home_path}/hyp3/{orbit}/{frame}/{year}'
            granule_list = glob(f'{data_path}/*P012*/')
            
            # loop through noise maps
            random.shuffle(granule_list)
            for granule_path in granule_list:
                ds = hyp3_to_xarray_single(granule_path)
                print(f'working on {ds.granule.item()}')
                local_signal_ds = signal_ds.rio.reproject_match(ds.unw_phase, nodata=np.nan)
                ds['signal'] = (('y', 'x'), local_signal_ds.band_data.values)
    
                # loop through tiles
                tiles = tiles.sample(frac=1)
                for i, tile in tiles.iterrows():
                    tile_counter = 0
                    
                    # clip to tile extent
                    try:
                        tile_ds = ds.rio.clip([tiles.iloc[i].geometry], crs=ds.rio.crs, drop=True)
                    except: 
                        print(f'no valid subsets in tile {i}')
                        continue
                    else:
                        #check if valid subset exists in tile
                        if np.invert(np.isnan(tile_ds.unw_phase.values)).sum() < subset_size**2:
                            print(f'no valid subsets in tile {i}')
                            continue
                        else:
                            timeout = time.time() + max_time_s # set time to spend on each tile
        
                            # try to find appropriate subsets for a while
                            while time.time() < timeout:
                                # grab random subset within sample 
                                subset_ds = sample_ds(tile_ds, subset_size)
                                
                                # test if subset elevation is above treeline
                                if np.median(subset_ds.elevation.values) > 3300:
                                    # interpolate small gaps
                                    unw_phase_ds = subset_ds.unw_phase.interpolate_na(dim='x', use_coordinate=False)
                                    unw_phase_ds = unw_phase_ds.interpolate_na(dim='y', use_coordinate=False)
    
                                    murp_phase_ds = subset_ds.murp_phase.interpolate_na(dim='x', use_coordinate=False)
                                    murp_phase_ds = murp_phase_ds.interpolate_na(dim='y', use_coordinate=False)
                                    
                                    nan_count = (np.isnan(subset_ds.elevation.values).sum() + 
                                                 np.isnan(subset_ds.era5_phase.values).sum() +
                                                 np.isnan(murp_phase_ds.values).sum() +
                                                 np.isnan(subset_ds.signal.values).sum() +
                                                 np.isnan(unw_phase_ds.values).sum())
    
                                    if nan_count == 0:
                                        subset_counter+=1
                                        tile_counter+=1
                                        subset_name = f'tile{i}_{orbit}_{ds.granule.item()[5:13]}_{ds.granule.item()[21:29]}_sub{subset_counter}.tif'
                                        
                                        # save subset
                                        unw_phase_ds.rio.to_raster(f'{home_path}/{subset_type}_subsets/noise/{subset_name}')
                                        murp_phase_ds.rio.to_raster(f'{home_path}/{subset_type}_subsets/murp/{subset_name}')
                                        subset_ds.era5_phase.rio.to_raster(f'{home_path}/{subset_type}_subsets/era5/{subset_name}')
                                        subset_ds.elevation.rio.to_raster(f'{home_path}/{subset_type}_subsets/dem/{subset_name}')
                                        subset_ds.signal.rio.to_raster(f'{home_path}/{subset_type}_subsets/signal/{subset_name}')
                                        if subset_counter >= subsets_desired:
                                            break
                                        if tile_counter >= max_per_tile:
                                            break
                        print(f'tile {i} subsets: {tile_counter}')
                    if subset_counter >= subsets_desired:
                        break
                                                                    

In [7]:
subset_noise(orbit, 
             frame,
             ['2017'],
             tiles_path='../polygons/train_RGI_grid_25km.shp',
             subsets_desired=200,
             subset_type='train')

working on S1AA_20170923T131000_20171005T131000_VVP012_INT40_G_ueF_0EE0
tile 65 subsets: 5
tile 59 subsets: 5
no valid subsets in tile 55
no valid subsets in tile 72
no valid subsets in tile 57
no valid subsets in tile 25
tile 42 subsets: 5
tile 24 subsets: 5
tile 8 subsets: 5
no valid subsets in tile 14
tile 36 subsets: 5
no valid subsets in tile 4
no valid subsets in tile 1
no valid subsets in tile 22
tile 75 subsets: 5
no valid subsets in tile 10
no valid subsets in tile 26
tile 5 subsets: 0
no valid subsets in tile 43
tile 15 subsets: 5
tile 30 subsets: 5
no valid subsets in tile 70
tile 76 subsets: 5
no valid subsets in tile 34
tile 68 subsets: 5
tile 46 subsets: 0
no valid subsets in tile 77
tile 21 subsets: 0
no valid subsets in tile 12
no valid subsets in tile 69
no valid subsets in tile 9
tile 2 subsets: 5
no valid subsets in tile 48
no valid subsets in tile 23
tile 63 subsets: 5
no valid subsets in tile 6
no valid subsets in tile 40
no valid subsets in tile 17
tile 61 subsets


KeyboardInterrupt

