In [1]:
import geopandas as gpd
import shapely
import shapely
import rasterio
import glob
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append("..")
import datetime
import profiling_tools
import altair as alt
from altair import datum
import os
import json
import math

# Inputs

Provide:

* Input file path to file with cross section lines/polygons to extract low points/stream profile from
* Output file path where low points will be saved
* Input directory path to location of DEMs
* Parameter LINE_COMPLEXITY which is the number of points that each cross-section line is split into. LINE_COMPLEXITY elevation points will be extracted from the DEM for each cross section line

If you use the arg, you must run from CLI like this

```
HSFM_GEOMORPH_INPUT_FILE='inputs/mazama_inputs.json' jupyter nbconvert --execute --to html dem-analysis/mt_baker_mass_wasted/xsections.ipynb  --output outputs/xsections_mazama.html
```

In [2]:
# Or set an env arg:
if os.environ.get('HSFM_GEOMORPH_INPUT_FILE'):
    json_file_path = os.environ['HSFM_GEOMORPH_INPUT_FILE']
else:
    json_file_path = 'inputs/mazama_inputs.json'

In [3]:
with open(json_file_path, 'r') as j:
     params = json.loads(j.read())

In [4]:
params

{'inputs': {'valley_name': 'Mazama',
  'TO_DROP': ['1970_09_09',
   '1974_08_10',
   '1977_09_27',
   '1987_08_21',
   '1990_09_05',
   '1991_09_09',
   '1992_09_15',
   '2013_09_13',
   '2019_10_11'],
  'TO_DROP_LARGER_AREA': ['1970_09_09',
   '1970_09_29',
   '1977_09_27',
   '1992_09_18',
   '1974_08_10',
   '1987_08_21',
   '1990_09_05',
   '1991_09_09',
   '1992_09_15',
   '2013_09_13',
   '2019_10_11'],
  'XSECTIONS_INCLUDE': None,
  'erosion_polygon_file': '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/erosion.geojson',
  'erosion_by_date_polygon_file': None,
  'glacier_polygons_file': '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/glaciers.geojson',
  'dems_path': '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems',
  'valley_bounds_file': '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/valley_bounds.geojson',
  'plot_output_dir': '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/plots/',
  'uncer

In [5]:
TO_DROP = params['inputs']['TO_DROP']
valley_name = params['inputs']['valley_name']
# if this is defined, only data from these dates are analyed
XSECTIONS_INCLUDE = params["inputs"]["XSECTIONS_INCLUDE"]
input_xsections_file = params['xsections']['input_xsections_file']
output_lowpoints_file = params['xsections']['output_lowpoints_file']
output_streamlines_file = params['xsections']['output_streamlines_file']
input_dems_path = params['inputs']['dems_path']
glacier_polygons_file = params['inputs']['glacier_polygons_file']
LINE_COMPLEXITY = params['xsections']['line_complexity']

group_slope_meters = params['xsections']['group_slope_meters']

# Used to strip date from dem file names
strip_time_format = params['inputs']['strip_time_format']

reference_dem_date = datetime.datetime.strptime(
    params['inputs']['reference_dem_date'], 
    strip_time_format
)

In [6]:
raster_fns = glob.glob(os.path.join(input_dems_path, "*.tif"))
if XSECTIONS_INCLUDE:
    raster_fns = [fn for fn in raster_fns if Path(fn).stem in XSECTIONS_INCLUDE]
else:
    raster_fns = [fn for fn in raster_fns if Path(fn).stem not in TO_DROP]
raster_fns

['/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1970_09_29.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1979_10_06.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1992_09_18.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/2015_09_01.tif',
 '/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1947_09_14.tif']

# Extract profiles from DEMs

Along each cross-section, extract point with lowest elevation and calculate "path distance", the distance from the furthest downstream cross section line.

In [7]:
# read cross sections file into GeoDataframe
gdf = gpd.read_file(input_xsections_file)
# Increase the number of points in each line
gdf.geometry = gdf.geometry.apply(lambda g: profiling_tools.increase_line_complexity(g, LINE_COMPLEXITY))
# Find the centroid of each line
gdf['centroid'] = gdf.geometry.apply(lambda x: x.centroid)
# Get all points from the cross section lines and create a row for each point. 
gdf['coords'] = gdf.geometry.apply(lambda x: list(x.coords))
gdf = gdf.explode('coords', ignore_index=True)
# Make the coords column a shapely.geometry.Point type and drop the cross section geometries which we no longer need.
gdf['coords'] = gdf['coords'].apply(shapely.geometry.Point)
gdf.drop(columns=["geometry"])

combined_gdf = gpd.GeoDataFrame()

for raster in raster_fns:
    print(raster)
    # Extract an elevation value for each point
    with rasterio.open(raster) as src:
        new_gdf = gdf.copy()
        # pnts['values'] = [sample[0] for sample in src.sample(coords)]
        # gdf['elevation'] = gdf['coords'].apply(lambda x: [sample for sample in src.sample(x)])
        new_gdf['elevation'] = pd.Series([sample[0] for sample in src.sample(new_gdf["coords"].apply(lambda x: x.xy))])
        new_gdf['elevation'] = new_gdf['elevation'].apply(lambda x: np.nan if x == src.nodata else x)
        
    # Convert file name to datetime as per the provided format
    date = datetime.datetime.strptime(Path(raster).stem, strip_time_format)
    new_gdf['time'] = date

    # Find the point in each cross section line (identified by the ID column, with 0 meaning furthest downstream) with the lowest elevation
    new_gdf = new_gdf.sort_values('elevation').groupby('id').apply(pd.DataFrame.head, n=1)
    new_gdf['low_point_coords'] = new_gdf.apply(lambda row: None if np.isnan(row['elevation']) else row['coords'], axis=1)

    # Set the geometry to the centroid (of the cross-section lines) to calculate "path distance", distance upstream from the furthest downstream cross-section
    new_gdf.geometry = new_gdf["centroid"]
    new_gdf['path_distance'] = pd.Series(new_gdf.distance(
            gpd.GeoDataFrame(new_gdf.shift(1), crs=new_gdf.crs)
        ).fillna(0)).cumsum()
    
    combined_gdf = combined_gdf.append(new_gdf)

combined_gdf = combined_gdf.set_crs(crs=gdf.crs)

/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1970_09_29.tif


  combined_gdf = combined_gdf.append(new_gdf)


/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1979_10_06.tif


  combined_gdf = combined_gdf.append(new_gdf)


/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1992_09_18.tif


  combined_gdf = combined_gdf.append(new_gdf)


/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/2015_09_01.tif


  combined_gdf = combined_gdf.append(new_gdf)


/data2/elilouis/hsfm-geomorph/data/mt_baker_mass_wasted/mazama/dems/1947_09_14.tif


  combined_gdf = combined_gdf.append(new_gdf)


In [8]:
def create_path_dist_from_glaciers(df):
    path_distance_at_glacier = df.loc[df['n_from_glacial_max']==0, 'path_distance'].iloc[0]
    df['path_distance_from_glacier'] = path_distance_at_glacier - df['path_distance']
    return df
combined_gdf = combined_gdf.groupby('time').apply(create_path_dist_from_glaciers)

In [9]:
combined_gdf.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,id,n_from_glacial_max,geometry,centroid,coords,elevation,time,low_point_coords,path_distance,path_distance_from_glacier
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,0,131,POINT (588753.434 5410881.893),POINT (588753.434 5410881.893),POINT (588708.9868223293 5410897.313375371),,1947-09-14,,0.0,2361.600181
0,0,0,131,POINT (588753.434 5410881.893),POINT (588753.434 5410881.893),POINT (588708.9868223293 5410897.313375371),,1970-09-29,,0.0,2361.600181
0,0,0,131,POINT (588753.434 5410881.893),POINT (588753.434 5410881.893),POINT (588708.9868223293 5410897.313375371),,1979-10-06,,0.0,2361.600181
0,0,0,131,POINT (588753.434 5410881.893),POINT (588753.434 5410881.893),POINT (588708.9868223293 5410897.313375371),,1992-09-18,,0.0,2361.600181
1,150,1,130,POINT (588744.549 5410867.664),POINT (588744.549 5410867.664),POINT (588702.8253882559 5410882.017171524),,1947-09-14,,16.775012,2344.825169


# Mark points as (non)glacial

In [10]:
glaciers_gdf = gpd.read_file(glacier_polygons_file)
glaciers_gdf = glaciers_gdf.to_crs(combined_gdf.crs)
glaciers_gdf['time'] = glaciers_gdf['year'].apply(lambda d: datetime.datetime.strptime(d, strip_time_format))


In [11]:
glaciers_gdf

Unnamed: 0,RGIId,GLIMSId,BgnDate,EndDate,CenLon,CenLat,O1Region,O2Region,Zmin,Zmax,...,Form,TermType,Surging,Linkages,Name,year,area,label,geometry,time
0,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,0.0,0.0,0.0,9.0,Mazama Glacier WA,1979_10_06,,,"MULTIPOLYGON (((589670.548 5407388.051, 589562...",1979-10-06
1,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,,,,9.0,Mazama Glacier WA,1977_09_27,,,"MULTIPOLYGON (((588807.312 5408253.091, 588815...",1977-09-27
2,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,,,,9.0,Mazama Glacier WA,1992_09_18,,,"MULTIPOLYGON (((588758.414 5408334.739, 588758...",1992-09-18
3,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,0.0,0.0,0.0,9.0,Mazama Glacier WA,2013_09_13,,,"MULTIPOLYGON (((588875.411 5406802.874, 588870...",2013-09-13
4,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,0.0,0.0,0.0,9.0,Mazama Glacier WA,2015_09_01,1270.0,Mazama Glacier,"MULTIPOLYGON (((588748.259 5406675.629, 588748...",2015-09-01
5,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,0.0,0.0,0.0,9.0,Mazama Glacier WA,2019_10_11,,,"MULTIPOLYGON (((588875.411 5406802.874, 588870...",2019-10-11
6,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,,,,9.0,Mazama Glacier WA,1970_09_29,1356.0,,"MULTIPOLYGON (((587787.599 5406793.125, 587780...",1970-09-29
7,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,0.0,0.0,0.0,9.0,Mazama Glacier WA,1947_09_14,,,"MULTIPOLYGON (((588888.239 5407846.745, 588881...",1947-09-14
8,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,0.0,0.0,0.0,9.0,Mazama Glacier WA,2013_09_13,,,"MULTIPOLYGON (((588748.259 5406675.629, 588748...",2013-09-13
9,RGI60-02.18775,G238196E48802N,19759999,-9999999,-121.80382,48.802,2,4,1234.0,2880.0,...,0.0,0.0,0.0,9.0,Mazama Glacier WA,2019_10_11,,,"MULTIPOLYGON (((588748.259 5406675.629, 588748...",2019-10-11


In [12]:

combined_gdf['glacial'] = combined_gdf.apply(
    lambda row: any(glaciers_gdf.loc[glaciers_gdf['time'] == row["time"], 'geometry'].apply(lambda g: g.contains(row['coords']))),
    axis=1
)

Plot elevation profiles (small)

In [13]:
src = combined_gdf[[ "time", "path_distance_from_glacier", "elevation", "glacial"]].reset_index()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))

alt.Chart(
    src
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y("elevation:Q", scale=alt.Scale(zero=False), impute=alt.ImputeParams(value=None), title="Valley floor elevation, in meters"),
    alt.Color("time:O"),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600
)

In [14]:
src = combined_gdf[[ "time", "path_distance_from_glacier", "elevation", "glacial"]].reset_index()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).mark_line().transform_filter(
    (datum.glacial == False)
).encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y("elevation:Q", scale=alt.Scale(zero=False), impute=alt.ImputeParams(value=None), title="Valley floor elevation, in meters"),
    alt.Color("time:O"),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600
)


# Calculate Residuals

Calculate so accumulation is always positive with time, erosion is negative with time.

In [15]:
diff_df = pd.DataFrame()

combined_gdf_grouped = combined_gdf.groupby("time")
reference_group = combined_gdf_grouped.get_group(reference_dem_date)

for timestamp, df in combined_gdf_grouped:    
    if timestamp != reference_dem_date:
        print(timestamp)
        this_diff_df = df.copy()
        merged = df.merge(reference_group, on='path_distance_from_glacier')
        if timestamp > reference_dem_date:
            residual_values = merged['elevation_x'] - merged['elevation_y']
        else:
            residual_values = merged['elevation_y'] - merged['elevation_x']
        assert len(this_diff_df) == len(residual_values)
        this_diff_df['elevation_residual'] = list(residual_values)
        diff_df = diff_df.append(this_diff_df)

1947-09-14 00:00:00
1970-09-29 00:00:00
1979-10-06 00:00:00
1992-09-18 00:00:00


  diff_df = diff_df.append(this_diff_df)
  diff_df = diff_df.append(this_diff_df)
  diff_df = diff_df.append(this_diff_df)
  diff_df = diff_df.append(this_diff_df)


Plot elevation residuals, exclude glacier signals (large)

In [16]:
src = diff_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'elevation_residual', 'n_from_glacial_max']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).transform_filter(
    (datum.glacial == False)
).mark_circle().encode(
    alt.X("path_distance_from_glacier:Q"),
    alt.Y("elevation_residual:Q", scale=alt.Scale(zero=False), title='Elevation Residuals (rolling mean, 10 meter window', impute=alt.ImputeParams(value=None)),
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N'),
    tooltip=['n_from_glacial_max', 'time']

).properties(
    width = 1400,
    height = 600,
    title="Elevation Residuals, relative to 2015 data."
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
).interactive()

Plot elevation residuals, exclude glacier signals (small)

In [17]:
src = diff_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'elevation_residual']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).transform_filter(
    (datum.glacial == False)
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y("elevation_residual:Q", scale=alt.Scale(zero=False), title="Valley floor elevation residuals relative to 2015 data, in meters", impute=alt.ImputeParams(value=None)),
    
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600,
    title="Valley floor elevation residuals relative to 2015 data, in meters"
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

Plot elevation residuals, exclude glacier signals, rolling mean (small)

In [18]:
src = diff_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'elevation_residual']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).transform_filter(
    (datum.glacial == False)
).transform_window(
    rolling_mean='mean(elevation_residual)',
    groupby=['time'],
    frame=[-5,5]
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y(
        "rolling_mean:Q", 
        scale=alt.Scale(zero=False), 
        title=['Elevation residuals relative to 2015 data, in meters,',  '(rolling mean, 10 meter window)'],
        impute=alt.ImputeParams(value=None)
    ),
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600,
    title="Elevation Residuals, relative to 2015 data."
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

Plot elevation residuals, include glacier signals (small)

In [19]:
src = diff_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'elevation_residual']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y(
        "elevation_residual:Q", 
        scale=alt.Scale(zero=False), 
        title=['Elevation residuals relative to 2015 data, in meters'],
        impute=alt.ImputeParams(value=None)
    ),
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600,
    title="Elevation Residuals, relative to 2015 data."
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

Plot elevation residuals, include glacier signals, rolling mean (small)

In [20]:
src = diff_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'elevation_residual']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).transform_window(
    rolling_mean='mean(elevation_residual)',
    groupby=['time'],
    frame=[-5,5]
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
        alt.Y(
        "rolling_mean:Q", 
        scale=alt.Scale(zero=False), 
        title=['Elevation residuals relative to 2015 data, in meters,',  '(rolling mean, 10 meter window)'],
        impute=alt.ImputeParams(value=None)
    ),
    
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600,
    title="Elevation Residuals, relative to 2015 data."
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

# Calculate slope (negative)

In [21]:
def calculate_gradient(df):
    df['slope'] = - np.gradient(df['elevation'], df['path_distance_from_glacier'])
    return df

# slope_df = combined_gdf.groupby('time').apply(lambda df: calculate_gradient(df))
slope_df = combined_gdf.query('glacial == False').groupby('time').apply(lambda df: calculate_gradient(df))


Plot slope (small)

In [22]:
src = slope_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'slope']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).transform_filter(
    (datum.glacial == False)
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y("slope:Q", scale=alt.Scale(zero=False), title='Valley floor slope', impute=alt.ImputeParams(value=None)),
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600,
    title="Valley floor gradient"
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

Plot slope, rolling mean (small)

In [23]:
src = slope_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'slope']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).transform_filter(
    (datum.glacial == False)
).transform_window(
    rolling_mean='mean(slope)',
    frame=[-5, 5],
    groupby=['time']
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y("rolling_mean:Q", scale=alt.Scale(zero=False), title='Valley floor slope (rolling mean, 10 meter window)', impute=alt.ImputeParams(value=None)),
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600,
    title="Valley floor gradient"
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

In [24]:
src = slope_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'slope']].reset_index().dropna()
src['time'] = src['time'].apply(lambda x: x.strftime(strip_time_format))
alt.Chart(
    src
).transform_filter(
    (datum.glacial == False)
).transform_window(
    rolling_mean='mean(slope)',
    frame=[-5, 5],
    groupby=['time']
).mark_line().encode(
    alt.X("path_distance_from_glacier:Q", title="Distance downstream from observed glacial maximum"),
    alt.Y("rolling_mean:Q", scale=alt.Scale(zero=False), title='Valley floor slope (rolling mean, 10 meter window)', impute=alt.ImputeParams(value=None)),
    alt.Color("time:O", scale=alt.Scale(scheme='viridis')),
    alt.StrokeDash('glacial:N')
).properties(
    width = 600,
    # height = 600,
    title="Valley floor gradient"
).configure_legend(
    titleColor='black', 
    titleFontSize=12, 
    labelFontSize=16, 
    symbolStrokeWidth=4
)

## Group by kilometer upslope/downslope from glacier

In [25]:
grouped_km = slope_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'slope']].reset_index().dropna()
grouped_km = grouped_km[~grouped_km.glacial]
grouped_km['Kilometer downstream from glacier'] = grouped_km['path_distance_from_glacier'].apply(lambda x: math.floor(x/1000))
groups = grouped_km.groupby(['time', 'Kilometer downstream from glacier'])
# remove data points if you weren't able to average slope over more than 500 meters
grouped_km = groups.filter(lambda df: 
    (df['path_distance_from_glacier'].max() - df['path_distance_from_glacier'].min()) > 1000*(2/3)
)
grouped_km = grouped_km.groupby(['time', 'Kilometer downstream from glacier']).mean().reset_index()

In [26]:
alt.Chart(grouped_km).mark_line(point=True).encode(
    alt.X('time:T', title=""),
    alt.Y('slope:Q', title="Valley floor slope"),
    alt.Facet('Kilometer downstream from glacier:O', title='Kilometer downstream from glacier')
).properties(width=200)

## Group by provided distance upslope/downslope from glacier

In [27]:
grouped_halfkm = slope_df[['elevation', 'time', 'path_distance_from_glacier', 'glacial', 'slope']].reset_index().dropna()
grouped_halfkm = grouped_halfkm[~grouped_halfkm.glacial]
grouped_halfkm['Half kilometer downstream from glacier'] = grouped_halfkm['path_distance_from_glacier'].apply(lambda x: math.floor(x/group_slope_meters))
groups = grouped_halfkm.groupby(['time', 'Half kilometer downstream from glacier'])
# remove data points if you weren't able to average slope over more than half of the averaging distance
grouped_halfkm = groups.filter(lambda df: 
    (df['path_distance_from_glacier'].max() - df['path_distance_from_glacier'].min()) > group_slope_meters*(2/3)
)
grouped_halfkm = grouped_halfkm.groupby(['time', 'Half kilometer downstream from glacier']).mean().reset_index()

In [28]:
alt.Chart(grouped_halfkm).mark_line(point=True).encode(
    alt.X('time:T', title=""),
    alt.Y('slope:Q', title="Valley floor slope"),
    alt.Facet('Half kilometer downstream from glacier:O', title='Half kilometer downstream from glacier')
).properties(width=200)

# Export low points


In [29]:
combined_gdf.geometry = combined_gdf['low_point_coords']

combined_gdf_noglacial = combined_gdf.query("not glacial")

combined_gdf_noglacial[
    ['geometry', 'path_distance_from_glacier', 'elevation', 'id', 'time']
].reset_index(drop=True).to_file(
    output_lowpoints_file,
    driver="GeoJSON"
)

  pd.Int64Index,


# Create streamlines from low points

In [30]:
from shapely.geometry import Point, LineString
streamlines = combined_gdf_noglacial.groupby("time").apply(lambda df: LineString([point for point in df.geometry.tolist() if point]))

In [31]:
streamlines_gdf = gpd.GeoDataFrame(geometry=streamlines, crs=combined_gdf.crs)

In [32]:
streamlines_gdf.to_file(output_streamlines_file)


# Save dataframes

In [None]:
elevation_profiles = combined_gdf[[ "time", "path_distance_from_glacier", "elevation", "glacial", "n_from_glacial_max"]].reset_index()
elevation_profiles['time'] = elevation_profiles['time'].apply(lambda x: x.strftime(strip_time_format))


dfs = [
    grouped_km,
    grouped_halfkm,
    elevation_profiles
]
names = [
    'slope_grouped_km',
    'slope_grouped_halfkm',
    'elevation_profiles'
]

for df,name in zip(dfs, names):
    df['valley'] = valley_name
    outdir = os.path.join("outputs", name)
    outfile = os.path.join(outdir, valley_name + ".pickle")
    os.makedirs(outdir, exist_ok=True)
    print(outfile)
    df.to_pickle(outfile)

outputs/slope_grouped_km/Mazama.pickle
outputs/slope_grouped_halfkm/Mazama.pickle
outputs/elevation_profiles/Mazama.pickle
