In [1]:
#! /usr/bin/env python
"""
Compute elevation statistics for the debris-covered areas in each latitude and longitude
"""

import sys
import os
import re
import subprocess
from datetime import datetime, timedelta
import time
import pickle
from collections import OrderedDict

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from scipy import ndimage
import xarray as xr
from osgeo import gdal, ogr, osr

from pygeotools.lib import malib, warplib, geolib, iolib, timelib

import debrisglobal.globaldebris_input as debris_prms

In [2]:
# Debris cover extent shapefile with statistics
dc_shp = gpd.read_file(debris_prms.debriscover_fp + debris_prms.debriscover_fn_dict[debris_prms.roi])
dc_shp = dc_shp.sort_values(by=['RGIId'])

print('All DC glaciers:', dc_shp.shape[0], 'All DC Area (km2):', dc_shp.DC_Area_v2.sum() / 1e6)

# Subset by percent debris-covered or debris-covered area
dc_shp_subset = dc_shp[((dc_shp['DC_Area__1'] > debris_prms.dc_percarea_threshold) | 
                        (dc_shp['DC_Area_v2'] / 1e6 > debris_prms.dc_area_threshold))
                        & (dc_shp['Area'] > debris_prms.min_glac_area)].copy()
dc_shp_subset.reset_index(inplace=True, drop=True)

rgino_str_list_subset = [x.split('-')[1] for x in dc_shp_subset.RGIId.values]

print('Subset DC glaciers:', dc_shp_subset.shape[0], 'Subset DC Area (km2):', dc_shp_subset.DC_Area_v2.sum() / 1e6)

dc_shp_subset

All DC glaciers: 341 All DC Area (km2): 51.655069
Subset DC glaciers: 21 Subset DC Area (km2): 28.536687


Unnamed: 0,RGIId,GLIMSId,BgnDate,EndDate,CenLon,CenLat,O1Region,O2Region,Area,Zmin,...,Name,DC_Area,DC_BgnDate,DC_EndDate,DC_CTSmean,DC_Area_%,area_singl,DC_Area_v2,DC_Area__1,geometry
0,RGI60-10.00001,G149170E76720N,19560599,19560999,149.17118,76.719993,10,2,3.92,99,...,SU5E17701001 Maliy Ice Cap,2733300,2013,2017,7.251092,69.727,2785157,2785157,71.05,"POLYGON ((149.12695 76.72222, 149.12812 76.722..."
1,RGI60-10.00002,G148920E76680N,19560599,19560999,148.920503,76.679101,10,2,48.144,0,...,SU5E17701002 Toll Ice Cap,5647500,2013,2017,7.661007,11.73,25219,5661734,11.76,"MULTIPOLYGON (((148.78120 76.66595, 148.78236 ..."
2,RGI60-10.00005,G148850E76700N,19560599,19560999,148.8489,76.700106,10,2,3.819,63,...,SU5E17701005 5,451800,2013,2017,7.383365,11.83,84663,435925,11.415,"MULTIPOLYGON (((148.81200 76.69586, 148.81317 ..."
3,RGI60-10.00006,G148650E76650N,19560599,19560999,148.646362,76.649529,10,2,12.966,0,...,SU5E17701006 De Long Ice Cap,6819300,2013,2017,9.795027,52.594,69352,6931987,53.463,"MULTIPOLYGON (((148.66874 76.63309, 148.67107 ..."
4,RGI60-10.01106,G086557E49836N,20110906,-9999999,86.566474,49.820984,10,4,13.712,2301,...,,1417500,2013,2017,57.303606,10.338,1298018,1334046,9.729,"MULTIPOLYGON (((86.55759 49.82646, 86.55758 49..."
5,RGI60-10.01114,G086625E49788N,20110906,-9999999,86.619087,49.790594,10,4,9.426,2075,...,,1729800,2013,2017,50.15174,18.351,126099,1699970,18.035,"MULTIPOLYGON (((86.62726 49.77112, 86.62767 49..."
6,RGI60-10.01120,G086544E49802N,20110906,-9999999,86.52052,49.792999,10,4,13.232,2077,...,,2533500,2013,2017,53.827958,19.147,2530849,2530849,19.127,"POLYGON ((86.44433 49.80951, 86.44475 49.80952..."
7,RGI60-10.01729,G087691E50105N,20130904,-9999999,87.67588,50.10963,10,4,5.806,2339,...,SU5A15106133 Korumdu Glacier,458100,2013,2017,28.785325,7.89,411604,411604,7.089,"POLYGON ((87.68211 50.12292, 87.68295 50.12291..."
8,RGI60-10.01730,G087712E50096N,20130904,-9999999,87.701364,50.09509,10,4,3.137,2641,...,,737100,2013,2017,20.810658,23.497,629338,629338,20.062,"POLYGON ((87.70568 50.09903, 87.70694 50.09902..."
9,RGI60-10.01989,G087960E49080N,20110823,-9999999,87.93986,49.074756,10,4,4.313,2864,...,,288900,2013,2017,40.35134,6.698,241364,241364,5.596,"POLYGON ((87.95571 49.08019, 87.95571 49.08046..."


In [3]:
# Select glaciers using RGI and find unique latlons
#  (Scherler DC shapefiles do not have same CenLat and CenLon for some reason)
main_glac_rgi_subset = debris_prms.selectglaciersrgitable(rgino_str_list_subset)
main_glac_rgi_subset['CenLon_360'] = main_glac_rgi_subset['CenLon']
main_glac_rgi_subset.loc[main_glac_rgi_subset['CenLon_360'] < 0, 'CenLon_360'] = (
    360 + main_glac_rgi_subset.loc[main_glac_rgi_subset['CenLon_360'] < 0, 'CenLon_360'])

# Load met data and find nearest latlon indices
ds = xr.open_dataset(debris_prms.metdata_fp + '../' + debris_prms.metdata_elev_fn)
#  argmin() finds the minimum distance between the glacier lat/lon and the GCM pixel
lat_nearidx = (np.abs(main_glac_rgi_subset['CenLat'].values[:,np.newaxis] - 
                      ds['latitude'][:].values).argmin(axis=1))
lon_nearidx = (np.abs(main_glac_rgi_subset['CenLon_360'].values[:,np.newaxis] - 
                      ds['longitude'][:].values).argmin(axis=1))
latlon_nearidx = list(zip(lat_nearidx, lon_nearidx))
latlon_nearidx_unique = sorted(list(set(latlon_nearidx)))
main_glac_rgi_subset['latlon_nearidx'] = latlon_nearidx
latlon_unique_dict = dict(zip(latlon_nearidx_unique,np.arange(0,len(latlon_nearidx_unique))))
latlon_unique_dict_reversed = dict(zip(np.arange(0,len(latlon_nearidx_unique)),latlon_nearidx_unique))
main_glac_rgi_subset['latlon_unique_no'] = main_glac_rgi_subset['latlon_nearidx'].map(latlon_unique_dict)

print('unique lat/lons:', len(np.unique(main_glac_rgi_subset['latlon_unique_no'])), '\n\n')

# Delete me
latlon_nearidx_unique_v1 = latlon_nearidx_unique.copy()

lat_list = np.array([ds.latitude[x[0]].values for x in latlon_nearidx_unique])
lon_list = np.array([ds.longitude[x[1]].values for x in latlon_nearidx_unique])
latlon_list = list(tuple(zip(list(lat_list), list(lon_list))))

# Pickle unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list, f)

21 glaciers in region 10 are included in this model run: ['00001', '00002', '00005', '00006', '01106', '01114', '01120', '01729', '01730', '01989', '02127', '03488', '03752', '03876', '03878', '04017', '04019', '04051', '04162', '04856', '05151'] and more
This study is focusing on 21 glaciers in region [10]
unique lat/lons: 17 




In [4]:
# ===== LOAD GLACIERS WITH DATA =====
main_glac_rgi_subset['mb_fn'] = np.nan
mb_binned_fp = debris_prms.mb_binned_fp

regions_str = [str(x).zfill(2) for x in debris_prms.roi_rgidict[debris_prms.roi]]

mb_fns = []
mb_rgiids = []
for i in os.listdir(mb_binned_fp):
    if i.endswith('_mb_bins.csv') and i.split('_')[0].split('.')[0].zfill(2) in regions_str:
        mb_fns.append(i)
        rgiid_raw = i.split('_')[0]
        rgiid = 'RGI60-' + rgiid_raw.split('.')[0].zfill(2) + '.' + rgiid_raw.split('.')[1]
        mb_rgiids.append(rgiid)
mb_rgiids = sorted(mb_rgiids)
mb_fns = sorted(mb_fns)
mb_fn_df = pd.DataFrame(np.zeros((len(mb_fns),2)), columns=['RGIId', 'mb_fn'])
mb_fn_df['RGIId'] = mb_rgiids
mb_fn_df['mb_fn'] = mb_fns

# Find glaciers that are debris-covered
mb_dc_rgiid = [value for value in list(mb_fn_df.RGIId.values) if value in list(main_glac_rgi_subset.RGIId.values)]
mb_fn_df_dc = mb_fn_df[mb_fn_df['RGIId'].isin(mb_dc_rgiid)]
mb_fn_df_dc = mb_fn_df_dc.sort_values('RGIId')

print('Debris-covered glaciers:', mb_fn_df_dc.shape[0], '\n\n')

mb_fn_dict = dict(zip(mb_fn_df_dc['RGIId'].values, mb_fn_df_dc['mb_fn'].values))

main_glac_rgi_subset['mb_fn'] = main_glac_rgi_subset.RGIId.map(mb_fn_dict)

Debris-covered glaciers: 8 




In [5]:
# ===== SELECT GLACIERS WITH DATA ====
main_glac_rgi_wobs = main_glac_rgi_subset.dropna(subset=['mb_fn']).copy()
# print('subset wdata length:', main_glac_rgi_wobs.shape)
main_glac_rgi_wobs.reset_index(inplace=True, drop=True)

# Update the latlon unique pickle files
latlon_nearidx_unique = sorted(list(set(main_glac_rgi_wobs['latlon_nearidx'].values)))
latlon_unique_dict = dict(zip(latlon_nearidx_unique,np.arange(0,len(latlon_nearidx_unique))))
latlon_unique_dict_reversed = dict(zip(np.arange(0,len(latlon_nearidx_unique)),latlon_nearidx_unique))
main_glac_rgi_wobs['latlon_unique_no'] = main_glac_rgi_wobs['latlon_nearidx'].map(latlon_unique_dict)

print('unique lat/lons:', len(np.unique(main_glac_rgi_wobs['latlon_unique_no'])), '\n\n')

lat_list = np.array([ds.latitude[x[0]].values for x in latlon_nearidx_unique])
lon_list = np.array([ds.longitude[x[1]].values for x in latlon_nearidx_unique])
latlon_list = list(tuple(zip(list(lat_list), list(lon_list))))

# Pickle unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list, f)
    
main_glac_rgi_wobs

unique lat/lons: 6 




Unnamed: 0,O1Index,RGIId,CenLon,CenLat,O1Region,O2Region,Area,Zmin,Zmax,Zmed,...,TermType,Surging,RefDate,glacno,rgino_str,RGIId_float,CenLon_360,latlon_nearidx,latlon_unique_no,mb_fn
0,1105,RGI60-10.01106,86.557007,49.835735,10,4,13.712,2301,4371,2982,...,0,9,20110906,1106,10.01106,10.01106,86.557007,"(161, 346)",2,10.01106_mb_bins.csv
1,1113,RGI60-10.01114,86.625198,49.787872,10,4,9.426,2075,4373,2821,...,0,9,20110906,1114,10.01114,10.01114,86.625198,"(161, 347)",3,10.01114_mb_bins.csv
2,1119,RGI60-10.01120,86.543709,49.801971,10,4,13.232,2077,4380,3210,...,0,9,20110906,1120,10.0112,10.0112,86.543709,"(161, 346)",2,10.01120_mb_bins.csv
3,1728,RGI60-10.01729,87.691193,50.104927,10,4,5.806,2339,4000,3304,...,0,9,20130904,1729,10.01729,10.01729,87.691193,"(160, 351)",1,10.01729_mb_bins.csv
4,1729,RGI60-10.01730,87.711716,50.095829,10,4,3.137,2641,3990,3175,...,0,9,20130904,1730,10.0173,10.0173,87.711716,"(160, 351)",1,10.01730_mb_bins.csv
5,1988,RGI60-10.01989,87.95961,49.079823,10,4,4.313,2864,3765,3192,...,0,9,20110823,1989,10.01989,10.01989,87.95961,"(164, 352)",5,10.01989_mb_bins.csv
6,2126,RGI60-10.02127,87.600677,48.989388,10,4,3.782,2371,3696,3103,...,0,9,20110823,2127,10.02127,10.02127,87.600677,"(164, 350)",4,10.02127_mb_bins.csv
7,5150,RGI60-10.05151,87.584122,50.066093,10,4,8.369,2249,4082,3434,...,0,9,20130904,5151,10.05151,10.05151,87.584122,"(160, 350)",0,10.05151_mb_bins.csv


In [6]:
# print('DELETE ME - HACK FOR DEVELOPMENT')
# print(np.where(main_glac_rgi_wobs['latlon_unique_no'] == 172)[0])
# main_glac_rgi_wobs = main_glac_rgi_wobs.loc[372:373,:]
# main_glac_rgi_wobs['mb_fn'].values

In [7]:
# ===== DEBRIS ELEVATION STATS ====================================================================================
# CALCULATE DEBRIS ELEVATION STATS FOR GLACIERS WITH DATA FOR EACH UNIQUE LAT/LON
elev_stats_latlon_dict = {}
latlon_list_updated = []
rgiid_4cal = []

for nlatlon, latlon_unique in enumerate(np.unique(main_glac_rgi_wobs.latlon_unique_no)):
# for nlatlon, latlon_unique in enumerate([np.unique(main_glac_rgi_wobs.latlon_unique_no)[0]]):

    main_glac_rgi_subset = main_glac_rgi_wobs[main_glac_rgi_wobs['latlon_unique_no'] == latlon_unique]
    main_glac_rgi_subset.reset_index(inplace=True, drop=True)
    
    # Debris elevation stats should be done by lat/lon
    df_all = None
    elev_list_all = []
    df_idx_count = 0
    count_width_passes = 0
    for nglac, glac_fn in enumerate(main_glac_rgi_subset.mb_fn.values):
#     for nglac, glac_fn in enumerate([main_glac_rgi_subset.mb_fn.values[0]]):

        glac_fullfn = debris_prms.mb_binned_fp + glac_fn
        
        glac_str_noleadzero = glac_fullfn.split('/')[-1].split('_')[0]
        rgiid = 'RGI60-' + glac_str_noleadzero.split('.')[0].zfill(2) + '.' + glac_str_noleadzero.split('.')[1]

        assert main_glac_rgi_subset.loc[nglac,'RGIId'] == rgiid, 'RGIId does not matach mass balance filename'
        
        # Select bins that meet calibratioin criteria
        df_raw = pd.read_csv(glac_fullfn)
        df = df_raw.dropna(subset=['mb_bin_mean_mwea'])
        df['z1_bin_areas_perc_cum'] = np.cumsum(df['z1_bin_area_valid_km2']) /df['z1_bin_area_valid_km2'].sum() * 100
        # add width to bins
        widths_fp = debris_prms.oggm_fp + 'widths/' + 'RGI60-' + rgiid.split('-')[1].split('.')[0] + '/'
        widths_fn = rgiid + '_widths_m.csv'
        try:
            # Add width to each elevation bin
            widths_df = pd.read_csv(widths_fp + widths_fn)
            elev_nearidx = (np.abs(df['bin_center_elev_m'].values[:,np.newaxis] - 
                                   widths_df['elev'].values).argmin(axis=1))
            df['width_m'] = widths_df.loc[elev_nearidx,'width_m'].values
        except:
            df['width_m'] = 0
        
        df_idx = np.where((df['vm_med'] <= debris_prms.vel_threshold) 
                          & (df['width_m'] >= debris_prms.width_min_dict[debris_prms.roi])
                          & (df['dc_bin_area_perc'] >= debris_prms.debrisperc_threshold)
                          & (df['dc_bin_count_valid'] >= 10)
                          & (df['z1_bin_areas_perc_cum'] <= debris_prms.term_area_perc)
                          )[0]
        df_debris = df.loc[df_idx,:]
        df_debris.reset_index(inplace=True, drop=True)
        df_idx_count += len(df_idx)
        
            
        if len(df_idx) > 0:
            for nelev, elev in enumerate(list(df_debris['bin_center_elev_m'].values)):
                elev_list_single = list(np.repeat(elev, df_debris.loc[nelev,'dc_bin_count_valid']))
                elev_list_all.extend(elev_list_single)
            
#             # only work with terminus
#             df_idx_dif = list(df_idx[1:] - df_idx[:-1])
#             if np.sum(df_idx_dif) == len(df_idx)-1:
#                 df_idx_nojump = df_idx
#             else:
#                 idx_jumpinbins = df_idx_dif.index(next(filter(lambda x: x>1, df_idx_dif)))
#                 df_idx_nojump = df_idx[0:idx_jumpinbins+1]
#             df_debris_nojump = df_debris.loc[df_idx_nojump,:]
#             df_debris_nojump.reset_index(inplace=True, drop=True)
#             # Median width to ensure terminus velocities can be estimated
#             width_median = np.median(widths_m[np.where(h < df_debris_nojump['bin_center_elev_m'].max())[0]])
#             if width_median > debris_prms.width_min_dict[debris_prms.roi]:
#                 for nelev, elev in enumerate(list(df_debris_nojump['bin_center_elev_m'].values)):
#                     elev_list_single = list(np.repeat(elev, df_debris_nojump.loc[nelev,'dc_bin_count_valid']))
#                     elev_list_all.extend(elev_list_single)
#                 count_width_passes += 1
    
            rgiid_4cal.append(rgiid.split('-')[1])
        
    if df_idx_count > 0:
        dc_zmean = np.mean(elev_list_all)
        dc_zstd = np.std(elev_list_all)
        dc_zmed = malib.fast_median(elev_list_all)
        dc_zmad = malib.mad(elev_list_all)
        
        lat_deg = float(ds.latitude[latlon_unique_dict_reversed[latlon_unique][0]].values)
        lon_deg = float(ds.longitude[latlon_unique_dict_reversed[latlon_unique][1]].values)
        elev_stats_latlon_dict[lat_deg,lon_deg] = [dc_zmean, dc_zstd, dc_zmed, dc_zmad]
        latlon_list_updated.append((lat_deg, lon_deg))
        
print('unique lat/lons updated:', len(latlon_list_updated))
# Update pickle of unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list_updated, f)

unique lat/lons updated: 5


In [8]:
# Statistics of data coverage
rgiid_4cal = sorted(rgiid_4cal)
main_glac_rgi_4cal = debris_prms.selectglaciersrgitable(rgiid_4cal)
dc_area_dict = dict(zip(dc_shp.RGIId.values, dc_shp.DC_Area_v2.values))
main_glac_rgi_4cal['DC_Area_v2'] = main_glac_rgi_4cal.RGIId.map(dc_area_dict)
print('\nDC glaciers (used for cal):', main_glac_rgi_4cal.shape[0], 
      'DC Area (used for cal, km2):', main_glac_rgi_4cal.DC_Area_v2.sum() / 1e6)

7 glaciers in region 10 are included in this model run: ['01106', '01114', '01120', '01729', '01730', '01989', '05151']
This study is focusing on 7 glaciers in region [10]

DC glaciers (used for cal): 7 DC Area (used for cal, km2): 7.369565


In [9]:
# ===== ADD DEBRIS ELEVATION STATS TO MET DATA ======
overwrite_dc_stats = True
for nlatlon, latlon in enumerate(latlon_list_updated):
# for nlatlon, latlon in enumerate([latlon_list_updated[0]]):
    
    lat_deg = latlon[0]
    lon_deg = latlon[1]
    
    print(nlatlon, lat_deg, lon_deg)
    
    if lat_deg < 0:
        lat_str = 'S-'
    else:
        lat_str = 'N-' 

    # ===== Meteorological data =====
    metdata_fn = debris_prms.metdata_fn_sample.replace(
        'XXXX', str(int(np.abs(lat_deg)*100)) + lat_str + str(int(lon_deg*100)) + 'E-')
    
    ds = xr.open_dataset(debris_prms.metdata_fp + metdata_fn) 
    try:
        print('  existed:', ds.dc_zmean.values, 'vs', elev_stats_latlon_dict[latlon][0])
    except:
        pass
    if 'dc_zmean' not in list(ds.keys()) or overwrite_dc_stats:
        # Add stats
        ds['dc_zmean'] = elev_stats_latlon_dict[latlon][0]
        ds['dc_zmean'].attrs = {'units':'m a.s.l.', 'long_name':'Mean debris cover elevation', 
                                'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zstd'] = elev_stats_latlon_dict[latlon][1]
        ds['dc_zstd'].attrs = {'units':'m a.s.l.', 'long_name':'Standard deviation of debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zmed'] = elev_stats_latlon_dict[latlon][2]
        ds['dc_zmed'].attrs = {'units':'m a.s.l.', 'long_name':'Median debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zmad'] = elev_stats_latlon_dict[latlon][3]
        ds['dc_zmad'].attrs = {'units':'m a.s.l.', 'long_name':'Median absolute deviation of debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}

        try:
            ds.close()
        except:
            continue
            
        # Export updated dataset
        ds.to_netcdf(debris_prms.metdata_fp + metdata_fn, mode='a')
    else:
        print(lat_deg, lon_deg, 'exists')

0 50.0 87.5
  existed: 2375.0 vs 2375.0
1 50.0 87.75
  existed: 2641.6452304394425 vs 2641.6452304394425
2 49.75 86.5
  existed: 2269.083044982699 vs 2269.083044982699
3 49.75 86.75
  existed: 2163.9162561576354 vs 2163.9162561576354
4 49.0 88.0
  existed: 2904.567901234568 vs 2904.567901234568


In [10]:
print('DONE!')

DONE!
