In [3]:
import geopandas as gpd
import shapely
import shapely
import rasterio
import glob
import pandas as pd
import numpy as np 
from pathlib import Path
import sys
sys.path.append("..")
import os
import datetime
import profiling_tools
import altair as alt
from altair import datum
import json
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Inputs
Provide:
- Input file path to file with cross section lines/polygons to extract low points/stream profile from
- Output file path where low points will be saved
- Input directory path to location of DEMs
- Parameter `LINE_COMPLEXITY` which is the number of points that each cross-section line is split into. `LINE_COMPLEXITY` elevation points will be extracted from the DEM for each cross section line

If you use the arg, you must run from CLI like this

```
HSFM_GEOMORPH_INPUT_FILE='inputs/mazama_inputs.json' jupyter nbconvert --execute --to html dem-analysis/mt_baker_mass_wasted/transects.ipynb  --output outputs/transects_mazama.html
```

In [4]:

# Or set an env arg:
if os.environ.get('HSFM_GEOMORPH_INPUT_FILE'):
    json_file_path = os.environ['HSFM_GEOMORPH_INPUT_FILE']
else:
    json_file_path = 'inputs/deming_inputs.json'

In [5]:
with open(json_file_path, 'r') as j:
     params = json.loads(j.read())

In [6]:
TO_DROP = params['inputs']['TO_DROP']
input_transects_file = params['transects']['input_transects_file']
input_dems_path = params['inputs']['dems_path']
glacier_polygons_file = params['inputs']['glacier_polygons_file']
LINE_COMPLEXITY = params['transects']['line_complexity']
raster_fns = glob.glob(os.path.join(input_dems_path, "*.tif"))

strip_time_format = params['inputs']['strip_time_format']

reference_dem_date = datetime.datetime.strptime(
    params['inputs']['reference_dem_date'], 
    strip_time_format
)

In [7]:

raster_fns = [fn for fn in raster_fns if Path(fn).stem not in TO_DROP]
raster_fns

['/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1970_09_29.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1979_10_06.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1991_09_09.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/2013_09_13.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/2015_09_01.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/2019_10_11.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1947_09_14.tif']

In [8]:
input_transects_file = "/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/whole_mountain/rockfall.shp"
LINE_COMPLEXITY = 20

raster_fns = [fn for fn in raster_fns if '2013' not in fn and '2019' not in fn]

# read cross sections file into GeoDataframe
valley = params['inputs']['valley_name'].lower()
gdf = gpd.read_file(input_transects_file).to_crs("EPSG:32610").query(f"valley == '{valley}'")

# Extract profiles from DEMs 

Along each cross-section, extract point with lowest elevation and calculate "path distance", the distance from the furthest downstream cross section line.

In [9]:

# Increase the number of points in each line
gdf.geometry = gdf.geometry.apply(lambda g: profiling_tools.increase_line_complexity(g, LINE_COMPLEXITY))
# Get all points from the cross section lines and create a row for each point. 
gdf['coords'] = gdf.geometry.apply(lambda x: list(x.coords))
crs = gdf.crs
gdf = gpd.GeoDataFrame(pd.DataFrame(gdf).explode('coords', ignore_index=True))
# Make the coords column a shapely.geometry.Point type and drop the cross section geometries which we no longer need.
gdf['coords'] = gdf['coords'].apply(shapely.geometry.Point)
gdf.drop(columns=["geometry"])

combined_gdf = gpd.GeoDataFrame(crs=crs)

for raster in raster_fns:
    print(raster)
    # Extract an elevation value for each point
    with rasterio.open(raster) as src:
        new_gdf = gdf.copy()
        new_gdf['elevation'] = pd.Series([sample[0] for sample in src.sample(new_gdf["coords"].apply(lambda x: (x.xy[0][0], x.xy[1][0])))])
        new_gdf['elevation'] = new_gdf['elevation'].apply(lambda x: np.nan if x == src.nodata else x)

    # Convert file name to datetime as per the provided format
    date = datetime.datetime.strptime(Path(raster).stem, strip_time_format)
    new_gdf['time'] = date

    # Set the geometry to the coords to calculate "path distance"    
    combined_gdf = combined_gdf.append(new_gdf)

combined_gdf

/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1970_09_29.tif
/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1979_10_06.tif
/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1991_09_09.tif
/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/2015_09_01.tif
/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/deming/dems/1947_09_14.tif


  combined_gdf = gpd.GeoDataFrame(crs=crs)


Unnamed: 0,id,valley,area,geometry,coords,elevation,time
0,1,deming,1,"LINESTRING (582220.482 5399304.091, 582221.536...",POINT (582220.4821996631 5399304.090624962),1176.612061,1970-09-29
1,1,deming,1,"LINESTRING (582220.482 5399304.091, 582221.536...",POINT (582221.5364975275 5399300.873198721),1174.911865,1970-09-29
2,1,deming,1,"LINESTRING (582220.482 5399304.091, 582221.536...",POINT (582222.5907953921 5399297.655772479),,1970-09-29
3,1,deming,1,"LINESTRING (582220.482 5399304.091, 582221.536...",POINT (582223.6450932565 5399294.438346238),1173.879639,1970-09-29
4,1,deming,1,"LINESTRING (582220.482 5399304.091, 582221.536...",POINT (582224.6993911209 5399291.220919996),1164.362061,1970-09-29
...,...,...,...,...,...,...,...
175,9,deming,1,"LINESTRING (582417.102 5399366.525, 582416.288...",POINT (582404.8918109176 5399322.916077691),1173.451538,1947-09-14
176,9,deming,1,"LINESTRING (582417.102 5399366.525, 582416.288...",POINT (582404.0777715684 5399320.008794301),1166.526733,1947-09-14
177,9,deming,1,"LINESTRING (582417.102 5399366.525, 582416.288...",POINT (582403.2637322192 5399317.101510911),1166.526733,1947-09-14
178,9,deming,1,"LINESTRING (582417.102 5399366.525, 582416.288...",POINT (582402.4496928699 5399314.19422752),1155.710327,1947-09-14


### Add path_distance value 

In [10]:
new_gdf = gpd.GeoDataFrame(crs=crs)
for key, group in combined_gdf.groupby(["id", "time"]):
    group.geometry = group['coords']
    group['path_distance'] = pd.Series(group
    .distance(group.shift(1)).fillna(0)).cumsum()
    new_gdf = new_gdf.append(group)

  new_gdf = gpd.GeoDataFrame(crs=crs)


# Calculate Rates

In [11]:
gb = new_gdf.sort_values(by=["valley", "area", "id", "path_distance", "time"]).drop(columns=['geometry', 'coords']).groupby(["valley", "area", "id", "path_distance"])

diff_df = pd.DataFrame()
for group_key in list(gb.groups):
    group = gb.get_group(group_key)
    new_df = group.reset_index(drop=True).join(
        group[['elevation', 'time']].diff().reset_index(drop=True).rename(columns={'elevation': 'elevation_diff', "time": "time_diff"})
    )
    diff_df = diff_df.append(new_df)

## PROBLEMATIC CODE:
## I think the second line is not assigning columns correctly...shitty way above
# new_gdf = new_gdf.sort_values(by=["valley", "area", "id", "path_distance", "time"])
# new_gdf[['elevation_diff', 'time diff']] = new_gdf.drop(columns=['geometry', 'coords']).groupby(["valley", "area", "id", "path_distance"])[['elevation', 'time']].diff().reset_index(drop=True)

In [12]:
diff_df.loc[(diff_df.id==1) & (diff_df['area']==1) & (diff_df['time'] == datetime.datetime(1947, 9, 14))]

Unnamed: 0,id,valley,area,elevation,time,path_distance,elevation_diff,time_diff
0,1,deming,1,1179.540405,1947-09-14,0.0,,NaT
0,1,deming,1,1178.282593,1947-09-14,3.385761,,NaT
0,1,deming,1,1176.463257,1947-09-14,6.771521,,NaT
0,1,deming,1,1176.583374,1947-09-14,10.157282,,NaT
0,1,deming,1,1175.559937,1947-09-14,13.543043,,NaT
0,1,deming,1,1171.579468,1947-09-14,16.928804,,NaT
0,1,deming,1,1166.792358,1947-09-14,20.314564,,NaT
0,1,deming,1,1160.707397,1947-09-14,23.700325,,NaT
0,1,deming,1,1156.90564,1947-09-14,27.086086,,NaT
0,1,deming,1,1155.214233,1947-09-14,30.471846,,NaT


In [13]:
# mean_elevations = new_gdf.groupby(["valley", "id", "time"]).mean().reset_index()
# diff_results = mean_elevations.groupby(["valley", "id"]).diff()
# diff_results['annual elevation change rate'] = diff_results['elevation'] / diff_results['time'].apply(lambda t: t.days/365.25)
# rates = mean_elevations.join(diff_results, rsuffix='_diff')
# rates['start date'] = rates['time'].shift(1)
# rates['end date'] = rates['time']
# rates = rates.dropna()
# valley_avg_rates = rates.groupby(['valley', 'start date', 'end date']).mean()

In [14]:
diff_df['incision rate'] = diff_df['elevation_diff'] / diff_df['time_diff'].apply(lambda x: x.days/265.25)
def nan_greater_than(n, limit):
    if np.isnan(n):
        return n
    elif n < limit:
        return n
    else:
        return 0
diff_df['incision rate'] = diff_df['incision rate'].apply(
    lambda x: nan_greater_than(x, 0)
)

In [15]:
diff_df.head()

Unnamed: 0,id,valley,area,elevation,time,path_distance,elevation_diff,time_diff,incision rate
0,1,deming,1,1179.540405,1947-09-14,0.0,,NaT,
1,1,deming,1,1176.612061,1970-09-29,0.0,-2.928345,8416 days,-0.092294
2,1,deming,1,1178.981812,1979-10-06,0.0,2.369751,3294 days,0.0
3,1,deming,1,1180.980957,1991-09-09,0.0,1.999146,4356 days,0.0
4,1,deming,1,1161.552124,2015-09-01,0.0,-19.428833,8758 days,-0.588433


In [18]:
src = diff_df[[ "time", "path_distance", "incision rate", "id", "area"]].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime("%Y-%m-%d"))
alt.Chart(
    src
).mark_line().encode(
    alt.X("path_distance:Q", scale=alt.Scale(zero=False)),
    alt.Y("incision rate:Q", scale=alt.Scale(zero=False), title="Annual Incision Rate (m/yr)"),
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
).facet(
    row="id:O",
    column="area:O"
).resolve_scale(
    x="independent",
    y="independent"
).properties(
    # width = 1400,
    # height = 600
)

In [80]:
src = new_gdf[[ "time", "path_distance", "elevation", "id"]].reset_index()
src['time'] = src['time'].apply(lambda x: x.strftime("%Y-%m-%d"))

alt.Chart(
    src
).mark_line().encode(
    alt.X("path_distance:Q", scale=alt.Scale(zero=False)),
    alt.Y("elevation:Q", scale=alt.Scale(zero=False)),
    alt.Color("time:O", scale=alt.Scale(scheme='turbo')),
).facet(
    row="id:O"
).resolve_scale(
    x="independent",
    y="independent"
).properties(
    # width = 1400,
    # height = 600
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

# Plot

In [53]:
src = valley_avg_rates.reset_index()
alt.Chart(src).mark_line().encode(
    alt.X('start date:T'),
    alt.X2('end date:T'),
    alt.Y('annual elevation change rate:Q')
)

In [54]:
src = rates.drop(columns = ['time_diff'])
alt.Chart(src).mark_line().encode(
    alt.X('start date:T'),
    alt.X2('end date:T'),
    alt.Y('annual elevation change rate:Q')
).facet(row='id')


In [56]:
src = new_gdf[[ "time", "path_distance", "elevation change", "id"]].reset_index()
src['time'] = src['time'].apply(lambda x: x.strftime("%Y-%m-%d"))
alt.Chart(
    src
).mark_point().encode(
    alt.X("path_distance:Q", scale=alt.Scale(zero=False)),
    alt.Y("elevation change:Q", scale=alt.Scale(zero=False)),
    alt.Color("time:O", scale=alt.Scale(scheme='turbo')),
).facet(
    row="id:O"
).resolve_scale(
    x="independent",
    y="independent"
).properties(
    # width = 1400,
    # height = 600
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

KeyError: "['elevation change'] not in index"

In [25]:
src = new_gdf[[ "time", "path_distance", "elevation", "id"]].reset_index()
src['time'] = src['time'].apply(lambda x: x.strftime("%Y-%m-%d"))
alt.Chart(
    src
).mark_line().encode(
    alt.X("path_distance:Q", scale=alt.Scale(zero=False)),
    alt.Y("elevation:Q", scale=alt.Scale(zero=False)),
    alt.Color("time:O", scale=alt.Scale(scheme='turbo')),
).facet(
    row="id:O"
).resolve_scale(
    x="independent",
    y="independent"
).properties(
    # width = 1400,
    # height = 600
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)