# Trialing Landslide Activity Filters

## Setup

In [1]:
# run in forrestwilliams/gis:1.03
%matplotlib inline
import rasterio as rio
import geopandas as gpd
import richdem as rd
from pathlib import Path
from rasterstats import zonal_stats
from skimage import morphology
from sklearn.metrics import cohen_kappa_score, accuracy_score, confusion_matrix

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product

import warnings
warnings.filterwarnings('ignore')

In [2]:
cd /home/micromamba/data

/home/micromamba/data


In [3]:
def read_raster(path,bands=1,crs=False):
    with rio.open(path) as dst:
        array = dst.read(bands)
        profile = dst.profile
        crs_val = dst.crs
        # array = np.moveaxis(array,0,-1)
    
    result = [array, profile]

    if crs:
        result.append(crs_val)

    return result


def write_raster(array,profile,out_path,dtype=rio.uint8):
    # From rasterio docs:
    # Register GDAL format drivers and configuration options with a
    # context manager.
    with rio.Env():
        # And then change the band count to 1, set the
        # dtype to uint8, and specify LZW compression.
        profile.update(
            dtype=dtype,
            count=1,
            compress='lzw')

        with rio.open(out_path, 'w', **profile) as dst:
            dst.write(array.astype(dtype), 1)

    return out_path


def vel_coh_filter(vel_array, coh_array, coh_thresh, n_std=2, min_size=None):
    data = vel_array.copy()

    # mask pixels with velocities less than 2 x velocity std in space
    mean = np.nanmean(vel_array)
    std = np.nanstd(vel_array)
    vel_mask = np.abs(vel_array - mean) > (std * n_std)
    
    # mask pixels with temporal coherence below threshold
    coh_mask = coh_array > coh_thresh
    mask = vel_mask & coh_mask

    # optionally remove small objects (min size in pixels)
    if min_size:
        mask = morphology.remove_small_objects(mask,min_size=min_size)
    
    data[~mask] = np.nan
    
    return (data, mask)

def vel_coh_filter_v2(vel_array, coh_array, coh_thresh, n_std=2, min_size=None):
    data = vel_array.copy()
    
    # mask pixels with temporal coherence below threshold
    coh_mask = coh_array > coh_thresh
    data[~coh_mask] = np.nan

    # mask pixels with velocities less than 2 x velocity std in space
    mean = np.nanmean(data)
    std = np.nanstd(data)
    vel_mask = np.abs(data - mean) > (std * n_std)
    
    # combine masks
    mask = vel_mask & coh_mask

    # optionally remove small objects (min size in pixels)
    if min_size:
        mask = morphology.remove_small_objects(mask,min_size=min_size)
    
    data[~mask] = np.nan
    
    return (data, mask)


def bekaert_filter(vel_array, std_array, n_std=2, min_size=None):
    data = vel_array.copy()
    
    # mask pixels less than 2 x temporal standard deviation
    mask = np.abs(data - np.nanmean(vel_array)) > (n_std * std_array)

    # optionally remove small objects (min size in pixels)
    if min_size:
        mask = morphology.remove_small_objects(mask,min_size=min_size)
    
    data[~mask] = np.nan
    
    return (data, mask)


def gdf_zonal_stats(gdf,data,affine,statistic):
    # Get polygons
    zones = list(gdf.geometry)

    # Perform zonal stats and convert to pandas series
    zs = zonal_stats(zones, data.astype(float), affine=affine,stats=[statistic],nodata=0.0)
    result = pd.Series([x[statistic] for x in zs])
    
    return result


def add_trial_column(gdf,data,affine,col_name):
    # Perform zonal stats
    counts = gdf_zonal_stats(gdf,data,affine,'count')

    # Change count to binary measure of activity and add to geodataframe
    gdf[col_name] = (counts > 0).astype(int)

    return gdf

## Prep landslide activity test dataset

In [4]:
active_type = 'active'

ls_path = Path('activity_validation_set.shp')
test_set = gpd.read_file(ls_path).to_crs(32760).reset_index(drop=True)

test_set = test_set[['Name','area_ha',active_type,'geometry']]
test_set = test_set.rename(columns={'Name':'name',active_type:'active'})
test_set['area_ha'] = test_set.geometry.area / 1e4

### Calculate Average Aspect

In [None]:
# dem, profile = read_raster()
# aspect = rd.aspect(dem)

imagery_path = Path('imagery')
in_dem = imagery_path / 'northIslandOtago.tif'
dem_path = imagery_path / 'dem.tif'
minx, miny, maxx, maxy = test_set.copy().buffer(10000).total_bounds
epsg = f'EPSG:{test_set.crs.to_epsg()}'

In [None]:
# !gdal_translate -projwin {minx} {maxy} {maxx} {miny} -projwin_srs {epsg} {str(in_dem)} subset.tif
# !gdalwarp -t_srs {epsg} subset.tif {str(dem_path)}
# !rm subset.tif

In [None]:
dem, dem_profile = read_raster(dem_path)
dem_affine = dem_profile['transform']

In [None]:
%%capture
rdem = rd.rdarray(dem, no_data=-32768)
aspect = rd.TerrainAttribute(rdem, attrib='aspect')

aspect = np.array(aspect)
aspect[aspect < 0] = np.nan
aspect = np.deg2rad(aspect)
# write_raster(aspect,dem_profile,'aspect.tif',dtype=rio.float32)

In [None]:
# Calculate circular mean from:
# https://en.wikipedia.org/wiki/Circular_mean

sin_aspect = gdf_zonal_stats(test_set,np.sin(aspect),dem_affine,'sum')
cos_aspect = gdf_zonal_stats(test_set,np.cos(aspect),dem_affine,'sum')

avg_aspect = np.rad2deg(np.arctan2(sin_aspect,cos_aspect))
test_set['aspect'] = avg_aspect

## Trial activity filters

### Load Data

In [5]:
dataset_names = ['velocity_desc','velocity_asc',
                 'std_asc','std_desc',
                 'temporalCoherence_desc','temporalCoherence_asc']
years = [2018,2019,2020,2021]

In [6]:
datasets = {}
for d in dataset_names:
    years_dic = {}
    
    for y in years:
        name = f'{d}{y}.tif'
        print(f'Reading {name} ...')
        years_dic[y] = read_raster(name)[0]

    datasets[d] = years_dic

print('Done!')

Reading velocity_desc2018.tif ...
Reading velocity_desc2019.tif ...
Reading velocity_desc2020.tif ...
Reading velocity_desc2021.tif ...
Reading velocity_asc2018.tif ...
Reading velocity_asc2019.tif ...
Reading velocity_asc2020.tif ...
Reading velocity_asc2021.tif ...
Reading std_asc2018.tif ...
Reading std_asc2019.tif ...
Reading std_asc2020.tif ...
Reading std_asc2021.tif ...
Reading std_desc2018.tif ...
Reading std_desc2019.tif ...
Reading std_desc2020.tif ...
Reading std_desc2021.tif ...
Reading temporalCoherence_desc2018.tif ...
Reading temporalCoherence_desc2019.tif ...
Reading temporalCoherence_desc2020.tif ...
Reading temporalCoherence_desc2021.tif ...
Reading temporalCoherence_asc2018.tif ...
Reading temporalCoherence_asc2019.tif ...
Reading temporalCoherence_asc2020.tif ...
Reading temporalCoherence_asc2021.tif ...
Done!


In [7]:
profile = read_raster('velocity_desc2019.tif')[1]
affine = profile['transform']

## Data exploration

In [None]:
test_set.shape

In [None]:
test_set[['active','area_ha']].groupby('active').describe()

In [None]:
coh = datasets['temporalCoherence_desc']['2019']
vel = datasets['velocity_desc']['2019']

In [None]:
vmin, vmax = (0.5,1)
f, (ax1,ax2) = plt.subplots(1,2,figsize=(20,5))
ax1.imshow(coh,vmin=vmin,vmax=vmax)
h=ax2.hist(coh.flatten(),range=(vmin,vmax),bins=100)

In [None]:
vmin, vmax = (-0.05,0.05)
f, (ax1,ax2) = plt.subplots(1,2,figsize=(20,5))
ax1.imshow(vel,vmin=vmin,vmax=vmax)
h=ax2.hist(vel.flatten(),range=(vmin,vmax),bins=50)

## Filter Trials

### Bekaert Desc

In [None]:
# best value is n_std = 11
name = f'std_desc_2019'
data, mask = bekaert_filter(datasets['velocity_desc']['2019'], datasets['std_desc']['2019'], min_size=min_size)
test_set = add_trial_column(test_set,mask,affine,f'bekaert_desc_2019')
write_raster(data,profile,name+'.tif',dtype=rio.float32)

### Bekaert Asc

In [None]:
# best value is n_std = 11
name = f'std_asc_2019'
data, mask = bekaert_filter(datasets['velocity_asc']['2019'], datasets['std_asc']['2019'], min_size=min_size)
test_set = add_trial_column(test_set,mask,affine,f'bekaert_asc_2019')
write_raster(data,profile,name+'.tif',dtype=rio.float32)

In [28]:
tcoh_asc = datasets['temporalCoherence_asc']
tcoh_desc = datasets['temporalCoherence_desc']

asc_means = [np.nanmean(tcoh_asc[x]) for x in tcoh_asc]
desc_means = [np.nanmean(tcoh_desc[x]) for x in tcoh_desc]

tcoh_mean = (np.mean(asc_means) + np.mean(desc_means)) / 2

del tcoh_asc, tcoh_desc

print(tcoh_mean)

0.9047293663024902


### Set Params

In [8]:
(19*40*40)/10000

3.04

In [9]:
min_size = 19
coh_thresh = 0.905
n_std = 2

### Vel_coh

In [10]:
result_bitwise = np.zeros(datasets['velocity_desc'][2019].shape)
result_count = np.zeros(datasets['velocity_desc'][2019].shape)
for i,y in enumerate(years):
    print(y)
    #descending
    desc, mask_desc = vel_coh_filter_v2(datasets['velocity_desc'][y],
                                     datasets['temporalCoherence_desc'][y],
                                     coh_thresh=coh_thresh, n_std=n_std, min_size=min_size)

    test_set = add_trial_column(test_set,mask_desc,affine,f'velcoh_desc_{y}')

    #ascending
    asc, mask_asc = vel_coh_filter_v2(datasets['velocity_asc'][y],
                                     datasets['temporalCoherence_asc'][y],
                                     coh_thresh=coh_thresh, n_std=n_std, min_size=min_size)

    test_set = add_trial_column(test_set,mask_desc,affine,f'velcoh_asc_{y}')

    #both
    mask = mask_asc | mask_desc
    test_set = add_trial_column(test_set,mask,affine,f'velcoh_both_{y}')
    
    # combine all results into a bitwise mask and activity count
    result_bitwise += mask.astype(int) * (2**i)
    result_count += mask.astype(int)
    
write_raster(result_bitwise,profile,'activity_bitwise_2018_2021.tif',dtype=rio.int8)
write_raster(result_count,profile,'activity_count_2018_2021.tif',dtype=rio.int8)

2018
2019
2020
2021


'activity_count_2018_2021.tif'

### Save Results

In [11]:
name = 'activity_measures_c905_ms19_std2_cohfirst.geojson'
test_set.to_file(name)

## Stats

In [12]:
gdf = test_set.copy()

In [13]:
gdf['velcoh_count'] = 0
for y in years:
    gdf['velcoh_count'] += gdf[f'velcoh_both_{y}']

for y in years:
    gdf[f'velcoh_count2_{y}'] = (gdf['velcoh_count'] >= 2).astype(int)
    gdf[f'velcoh_count3_{y}'] = (gdf['velcoh_count'] >= 3).astype(int)

gdf = gdf.drop(columns=['velcoh_count'])

In [14]:
trials = [x for x in gdf.columns if ('bekaert' in x) | ('velcoh' in x)]

tmp = gdf[['name','active'] + trials]
cols = tmp.columns

# Desired architecture:
# landslide year active type value

df = tmp.melt(id_vars=cols[0:2],value_vars=cols[2:],value_name='prediction')
df[['method','direction','year']] = df.variable.str.split('_',expand=True)
df['type'] = df.method + '_' + df.direction
df['year'] = df.year.astype(int)
df = df.drop(['method','direction','variable'],axis=1)

In [15]:
# from sklearn.metrics import cohen_kappa_score, accuracy_score, confusion_matrix
cols = ['type','year','true_neg','true_pos','kappa','acc']
metrics_yearly = pd.DataFrame(columns=cols)

for t,y in product(df.type.unique(),df.year.unique()):
    sel = df.loc[(df.type == t) & (df.year == y)].copy()

    conf = confusion_matrix(sel.active,sel.prediction)
    kappa = cohen_kappa_score(sel.active,sel.prediction)
    acc = accuracy_score(sel.active,sel.prediction)
    
    metrics_yearly = metrics_yearly.append(dict(zip(cols,[t,y,conf[0,0],conf[1,1],kappa,acc])),ignore_index=True)

In [16]:
metrics_yearly.sort_values(['year','type']).reset_index(drop=True)

Unnamed: 0,type,year,true_neg,true_pos,kappa,acc
0,velcoh_asc,2018,47,9,0.777778,0.933333
1,velcoh_both,2018,42,11,0.684685,0.883333
2,velcoh_count2,2018,42,11,0.684685,0.883333
3,velcoh_count3,2018,46,10,0.791667,0.933333
4,velcoh_desc,2018,47,9,0.777778,0.933333
5,velcoh_asc,2019,43,11,0.722222,0.9
6,velcoh_both,2019,42,11,0.684685,0.883333
7,velcoh_count2,2019,42,11,0.684685,0.883333
8,velcoh_count3,2019,46,10,0.791667,0.933333
9,velcoh_desc,2019,43,11,0.722222,0.9


## Scrap

In [127]:
# from sklearn.metrics import cohen_kappa_score, accuracy_score, confusion_matrix
cols = ['type','true_neg','true_pos','kappa','acc']
metrics = pd.DataFrame(columns=cols)

for t in df.type.unique():
    sel = df.loc[df.type == t].copy()

    conf = confusion_matrix(sel.active,sel.prediction)
    kappa = cohen_kappa_score(sel.active,sel.prediction)
    acc = accuracy_score(sel.active,sel.prediction)
    
    metrics = metrics.append(dict(zip(cols,[t,conf[0,0],conf[1,1],kappa,acc])),ignore_index=True)

In [128]:
metrics.sort_values('type')

Unnamed: 0,type,true_neg,true_pos,kappa,acc
1,velcoh_asc,180,31,0.607046,0.879167
2,velcoh_both,167,38,0.592075,0.854167
3,velcoh_count2,168,44,0.684685,0.883333
4,velcoh_count3,184,40,0.791667,0.933333
0,velcoh_desc,180,31,0.607046,0.879167


## Done!