## Setup

In [1]:
# run in forrestwilliams/gis:1.03
%matplotlib inline
import rasterio as rio
import geopandas as gpd
from pathlib import Path
from skimage import morphology
from rasterstats import zonal_stats

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product

In [2]:
cd /home/micromamba/data

/home/micromamba/data


In [3]:
def read_raster(path,bands=1,crs=False):
    with rio.open(path) as dst:
        array = dst.read(bands)
        profile = dst.profile
        crs_val = dst.crs
        # array = np.moveaxis(array,0,-1)
    
    result = [array, profile]

    if crs:
        result.append(crs_val)

    return result


def write_raster(array,profile,out_path,dtype=rio.uint8):
    # From rasterio docs:
    # Register GDAL format drivers and configuration options with a
    # context manager.
    with rio.Env():
        # And then change the band count to 1, set the
        # dtype to uint8, and specify LZW compression.
        profile.update(
            dtype=dtype,
            count=1,
            compress='lzw')

        with rio.open(out_path, 'w', **profile) as dst:
            dst.write(array.astype(dtype), 1)

    return out_path

def vel_coh_filter(vel_array, coh_array, coh_thresh, n_std=2, min_size=None):
    data = vel_array.copy()
    
    # mask pixels with temporal coherence below threshold
    coh_mask = coh_array > coh_thresh
    data[~coh_mask] = np.nan

    # mask pixels with velocities less than 2 x velocity std in space
    mean = np.nanmean(data)
    std = np.nanstd(data)
    vel_mask = np.abs(data - mean) > (std * n_std)
    
    # combine masks
    mask = vel_mask & coh_mask

    # optionally remove small objects (min size in pixels)
    if min_size:
        mask = morphology.remove_small_objects(mask,min_size=min_size)
    
    data[~mask] = np.nan
    
    return (data, mask)


def gdf_zonal_stats(gdf,data,affine,statistic,nodata=0,dtype=float):
    # Get polygons
    zones = list(gdf.geometry)

    # Perform zonal stats and convert to pandas series
    zs = zonal_stats(zones, data.astype(dtype), affine=affine,stats=[statistic],nodata=nodata)
    result = pd.Series([x[statistic] for x in zs])
    
    return result

### Load Data

In [4]:
dataset_names = ['velocity_desc','velocity_asc',
                 'temporalCoherence_desc','temporalCoherence_asc']

years = [2018,2019,2020,2021]

In [5]:
datasets = {}
parent_dir = Path('insar')

for d in dataset_names:
    years_dic = {}
    
    for y in years:
        name = parent_dir / f'{d}{y}.tif'
        print(f'Reading {name} ...')
        years_dic[y] = read_raster(name)[0]

    datasets[d] = years_dic

print('Done!')

Reading insar/velocity_desc2018.tif ...
Reading insar/velocity_desc2019.tif ...
Reading insar/velocity_desc2020.tif ...
Reading insar/velocity_desc2021.tif ...
Reading insar/velocity_asc2018.tif ...
Reading insar/velocity_asc2019.tif ...
Reading insar/velocity_asc2020.tif ...
Reading insar/velocity_asc2021.tif ...
Reading insar/temporalCoherence_desc2018.tif ...
Reading insar/temporalCoherence_desc2019.tif ...
Reading insar/temporalCoherence_desc2020.tif ...
Reading insar/temporalCoherence_desc2021.tif ...
Reading insar/temporalCoherence_asc2018.tif ...
Reading insar/temporalCoherence_asc2019.tif ...
Reading insar/temporalCoherence_asc2020.tif ...
Reading insar/temporalCoherence_asc2021.tif ...
Done!


In [6]:
profile = read_raster('insar/velocity_desc2019.tif')[1]
affine = profile['transform']

## Create activity dataset

### Relevant parameters

In [7]:
# relevant parameters
min_size = 19
coh_thresh = 0.905
n_std = 2

In [8]:
# create blank outputs
activity_bitwise = np.zeros(datasets['velocity_desc'][2019].shape)
activity_count = np.zeros(datasets['velocity_desc'][2019].shape)

# calculate
for i,y in enumerate(years):
    print(y)
    #descending
    desc, mask_desc = vel_coh_filter(datasets['velocity_desc'][y],
                                     datasets['temporalCoherence_desc'][y],
                                     coh_thresh=coh_thresh, n_std=n_std, min_size=min_size)

    #ascending
    asc, mask_asc = vel_coh_filter(datasets['velocity_asc'][y],
                                     datasets['temporalCoherence_asc'][y],
                                     coh_thresh=coh_thresh, n_std=n_std, min_size=min_size)

    #both
    mask = mask_asc | mask_desc
    
    # combine all results into a bitwise mask and activity count
    activity_bitwise += mask.astype(int) * (2**i)
    activity_count += mask.astype(int)

2018
2019
2020
2021


In [9]:
# save
bitwise_name = write_raster(activity_bitwise,profile,parent_dir/'activity_bitwise_2018_2021.tif',dtype=rio.int8)
count_name = write_raster(activity_count,profile,parent_dir/'activity_count_2018_2021.tif',dtype=rio.int8)

In [10]:
del datasets

### Intersect with landslide data

In [11]:
landslides = gpd.read_file('landslides/large_landslidesV2.shp').to_crs(32760)

In [12]:
drops = [x for x in landslides.columns if 'active' in x]
landslides = landslides.drop(columns=drops)

In [13]:
landslides['y_active'] = gdf_zonal_stats(landslides,activity_count,affine,'max',nodata=-1,dtype=int)

In [14]:
landslides['active_2y'] = (landslides['y_active'] >= 2).astype(int)

In [65]:
# landslides.to_file('landslides/large_landslidesV5.shp')

In [15]:
landslides.active_2y.mean()

0.11258278145695365

### Fix mistake

In [44]:
old = gpd.read_file('landslides/large_landslidesV6.shp').to_crs(32760)

In [45]:
landslides.columns

Index(['Name', 'Shape_Leng', 'Shape_Area', 'age', 'confidence', 'class',
       'notes', 'area_ha', 'geometry', 'y_active', 'active_2y'],
      dtype='object')

In [46]:
old.columns

Index(['Name', 'Shape_Leng', 'Shape_Area', 'age', 'confidence', 'class',
       'notes', 'area_ha', 'y_active', 'active_2y', 'evidence', 'redraw',
       'connected', 'geometry'],
      dtype='object')

In [63]:
landslides[['evidence','redraw','connected']] = -1

In [64]:
for i,row in old.iterrows():
    # print(row[['evidence','redraw','connected']])
    landslides.loc[landslides['Name']==row['Name'],'evidence'] = int(row['evidence'])
    landslides.loc[landslides['Name']==row['Name'],'redraw'] = int(row['redraw'])
    landslides.loc[landslides['Name']==row['Name'],'connected'] = int(row['connected'])
    # print(row['c'])

In [65]:
landslides.to_file('landslides/large_landslides_activeV1.shp')

In [66]:
69/1057

0.065279091769158