In [1]:
#! /usr/bin/env python
"""
Compute elevation statistics for the debris-covered areas in each latitude and longitude
"""

import sys
import os
import re
import subprocess
from datetime import datetime, timedelta
import time
import pickle
from collections import OrderedDict

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from scipy import ndimage
import xarray as xr
from osgeo import gdal, ogr, osr

from pygeotools.lib import malib, warplib, geolib, iolib, timelib

import debrisglobal.globaldebris_input as debris_prms

In [2]:
# Debris cover extent shapefile with statistics
dc_shp = gpd.read_file(debris_prms.debriscover_fp + debris_prms.debriscover_fn_dict[debris_prms.roi])
dc_shp = dc_shp.sort_values(by=['RGIId'])

print('All DC glaciers:', dc_shp.shape[0], 'All DC Area (km2):', dc_shp.DC_Area_v2.sum() / 1e6)

# Subset by percent debris-covered or debris-covered area
dc_shp_subset = dc_shp[((dc_shp['DC_Area__1'] > debris_prms.dc_percarea_threshold) | 
                        (dc_shp['DC_Area_v2'] / 1e6 > debris_prms.dc_area_threshold))
                        & (dc_shp['Area'] > debris_prms.min_glac_area)].copy()
dc_shp_subset.reset_index(inplace=True, drop=True)

rgino_str_list_subset = [x.split('-')[1] for x in dc_shp_subset.RGIId.values]

print('Subset DC glaciers:', dc_shp_subset.shape[0], 'Subset DC Area (km2):', dc_shp_subset.DC_Area_v2.sum() / 1e6)

dc_shp_subset

All DC glaciers: 1433 All DC Area (km2): 358.379314
Subset DC glaciers: 209 Subset DC Area (km2): 164.320852


Unnamed: 0,RGIId,GLIMSId,BgnDate,EndDate,CenLon,CenLat,O1Region,O2Region,Area,Zmin,...,Name,DC_Area,DC_BgnDate,DC_EndDate,DC_CTSmean,DC_Area_%,area_singl,DC_Area_v2,DC_Area__1,geometry
0,RGI60-16.00080,G289192E15597S,20009999,20030531,-70.808481,-15.597843,16,1,2.350,5015,...,,1879200,2013,2017,13.879310,79.966,2000329,2000329,85.120,"POLYGON ((-70.82284 -15.60048, -70.82256 -15.6..."
1,RGI60-16.00141,G289250E15308S,20009999,20030531,-70.746611,-15.311424,16,1,2.558,4944,...,,2374200,2013,2017,19.909379,92.815,2451201,2451201,95.825,"POLYGON ((-70.76950 -15.31381, -70.76922 -15.3..."
2,RGI60-16.00163,G289304E15345S,20009999,20030531,-70.698399,-15.343793,16,1,2.305,4988,...,,1108800,2013,2017,15.175185,48.104,650462,1081444,46.917,"MULTIPOLYGON (((-70.70664 -15.34792, -70.70636..."
3,RGI60-16.00173,G289290E15376S,20009999,20030531,-70.707924,-15.374475,16,1,2.115,4952,...,,365400,2013,2017,21.383340,17.277,130158,302953,14.324,"MULTIPOLYGON (((-70.71444 -15.37876, -70.71416..."
4,RGI60-16.00176,G289280E15353S,20009999,20030531,-70.707928,-15.357731,16,1,2.546,4987,...,,903600,2013,2017,22.217345,35.491,50399,853055,33.506,"MULTIPOLYGON (((-70.70505 -15.35796, -70.70477..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204,RGI60-16.02497,G282309E08904S,20031018,-9999999,-77.693319,-8.900989,16,1,3.398,4638,...,,522000,2013,2017,8.423199,15.362,508587,508587,14.967,"POLYGON ((-77.70315 -8.91299, -77.70287 -8.912..."
205,RGI60-16.02518,G282284E08882S,20031018,-9999999,-77.715349,-8.882387,16,1,3.588,4637,...,,318600,2013,2017,7.594425,8.880,315452,315452,8.792,"POLYGON ((-77.72666 -8.88517, -77.72611 -8.885..."
206,RGI60-16.02566,G282336E08900S,20031018,-9999999,-77.666773,-8.899181,16,1,2.068,4639,...,,336600,2013,2017,6.900700,16.277,324467,324467,15.690,"POLYGON ((-77.67363 -8.90290, -77.67336 -8.902..."
207,RGI60-16.02586,G282308E08800S,20031018,-9999999,-77.691744,-8.796642,16,1,2.136,4703,...,,452700,2013,2017,3.780303,21.194,128523,406914,19.050,"MULTIPOLYGON (((-77.69772 -8.80488, -77.69718 ..."


In [3]:
# Select glaciers using RGI and find unique latlons
#  (Scherler DC shapefiles do not have same CenLat and CenLon for some reason)
main_glac_rgi_subset = debris_prms.selectglaciersrgitable(rgino_str_list_subset)
main_glac_rgi_subset['CenLon_360'] = main_glac_rgi_subset['CenLon']
main_glac_rgi_subset.loc[main_glac_rgi_subset['CenLon_360'] < 0, 'CenLon_360'] = (
    360 + main_glac_rgi_subset.loc[main_glac_rgi_subset['CenLon_360'] < 0, 'CenLon_360'])

# Load met data and find nearest latlon indices
ds = xr.open_dataset(debris_prms.metdata_fp + '../' + debris_prms.metdata_elev_fn)
#  argmin() finds the minimum distance between the glacier lat/lon and the GCM pixel
lat_nearidx = (np.abs(main_glac_rgi_subset['CenLat'].values[:,np.newaxis] - 
                      ds['latitude'][:].values).argmin(axis=1))
lon_nearidx = (np.abs(main_glac_rgi_subset['CenLon_360'].values[:,np.newaxis] - 
                      ds['longitude'][:].values).argmin(axis=1))
latlon_nearidx = list(zip(lat_nearidx, lon_nearidx))
latlon_nearidx_unique = sorted(list(set(latlon_nearidx)))
main_glac_rgi_subset['latlon_nearidx'] = latlon_nearidx
latlon_unique_dict = dict(zip(latlon_nearidx_unique,np.arange(0,len(latlon_nearidx_unique))))
latlon_unique_dict_reversed = dict(zip(np.arange(0,len(latlon_nearidx_unique)),latlon_nearidx_unique))
main_glac_rgi_subset['latlon_unique_no'] = main_glac_rgi_subset['latlon_nearidx'].map(latlon_unique_dict)

print('unique lat/lons:', len(np.unique(main_glac_rgi_subset['latlon_unique_no'])), '\n\n')

# Delete me
latlon_nearidx_unique_v1 = latlon_nearidx_unique.copy()

lat_list = np.array([ds.latitude[x[0]].values for x in latlon_nearidx_unique])
lon_list = np.array([ds.longitude[x[1]].values for x in latlon_nearidx_unique])
latlon_list = list(tuple(zip(list(lat_list), list(lon_list))))

# Pickle unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list, f)

209 glaciers in region 16 are included in this model run: ['00080', '00141', '00163', '00173', '00176', '00177', '00205', '00213', '00214', '00216', '00228', '00244', '00248', '00256', '00261', '00274', '00285', '00287', '00288', '00289', '00299', '00331', '00332', '00337', '00360', '00361', '00362', '00363', '00366', '00368', '00370', '00372', '00373', '00410', '00413', '00417', '00427', '00428', '00433', '00485', '00486', '00493', '00496', '00500', '00516', '00540', '00543', '00560', '00566', '00582'] and more
This study is focusing on 209 glaciers in region [16]
unique lat/lons: 55 




In [4]:
# ===== LOAD GLACIERS WITH DATA =====
main_glac_rgi_subset['mb_fn'] = np.nan
mb_binned_fp = debris_prms.mb_binned_fp

regions_str = [str(x).zfill(2) for x in debris_prms.roi_rgidict[debris_prms.roi]]

mb_fns = []
mb_rgiids = []
for i in os.listdir(mb_binned_fp):
    if i.endswith('_mb_bins.csv') and i.split('_')[0].split('.')[0].zfill(2) in regions_str:
        mb_fns.append(i)
        rgiid_raw = i.split('_')[0]
        rgiid = 'RGI60-' + rgiid_raw.split('.')[0].zfill(2) + '.' + rgiid_raw.split('.')[1]
        mb_rgiids.append(rgiid)
mb_rgiids = sorted(mb_rgiids)
mb_fns = sorted(mb_fns)
mb_fn_df = pd.DataFrame(np.zeros((len(mb_fns),2)), columns=['RGIId', 'mb_fn'])
mb_fn_df['RGIId'] = mb_rgiids
mb_fn_df['mb_fn'] = mb_fns

# Find glaciers that are debris-covered
mb_dc_rgiid = [value for value in list(mb_fn_df.RGIId.values) if value in list(main_glac_rgi_subset.RGIId.values)]
mb_fn_df_dc = mb_fn_df[mb_fn_df['RGIId'].isin(mb_dc_rgiid)]
mb_fn_df_dc = mb_fn_df_dc.sort_values('RGIId')

print('Debris-covered glaciers:', mb_fn_df_dc.shape[0], '\n\n')

mb_fn_dict = dict(zip(mb_fn_df_dc['RGIId'].values, mb_fn_df_dc['mb_fn'].values))

main_glac_rgi_subset['mb_fn'] = main_glac_rgi_subset.RGIId.map(mb_fn_dict)

Debris-covered glaciers: 20 




In [5]:
# ===== SELECT GLACIERS WITH DATA ====
main_glac_rgi_wobs = main_glac_rgi_subset.dropna(subset=['mb_fn']).copy()
# print('subset wdata length:', main_glac_rgi_wobs.shape)
main_glac_rgi_wobs.reset_index(inplace=True, drop=True)

# Update the latlon unique pickle files
latlon_nearidx_unique = sorted(list(set(main_glac_rgi_wobs['latlon_nearidx'].values)))
latlon_unique_dict = dict(zip(latlon_nearidx_unique,np.arange(0,len(latlon_nearidx_unique))))
latlon_unique_dict_reversed = dict(zip(np.arange(0,len(latlon_nearidx_unique)),latlon_nearidx_unique))
main_glac_rgi_wobs['latlon_unique_no'] = main_glac_rgi_wobs['latlon_nearidx'].map(latlon_unique_dict)

print('unique lat/lons:', len(np.unique(main_glac_rgi_wobs['latlon_unique_no'])), '\n\n')

lat_list = np.array([ds.latitude[x[0]].values for x in latlon_nearidx_unique])
lon_list = np.array([ds.longitude[x[1]].values for x in latlon_nearidx_unique])
latlon_list = list(tuple(zip(list(lat_list), list(lon_list))))

# Pickle unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list, f)
    
main_glac_rgi_wobs

unique lat/lons: 10 




Unnamed: 0,O1Index,RGIId,CenLon,CenLat,O1Region,O2Region,Area,Zmin,Zmax,Zmed,...,TermType,Surging,RefDate,glacno,rgino_str,RGIId_float,CenLon_360,latlon_nearidx,latlon_unique_no,mb_fn
0,1333,RGI60-16.01339,-78.1446,-0.489511,16,1,9.487,4484,5692,5042,...,0,9,20019999,1339,16.01339,16.01339,281.8554,"(362, 1127)",2,16.01339_mb_bins.csv
1,1340,RGI60-16.01346,-77.9845,0.033719,16,1,2.617,4380,5773,5181,...,0,9,20019999,1346,16.01346,16.01346,282.0155,"(360, 1128)",1,16.01346_mb_bins.csv
2,1344,RGI60-16.01350,-76.0329,2.91366,16,1,2.26,4493,5370,4856,...,0,9,20019999,1350,16.0135,16.0135,283.9671,"(348, 1136)",0,16.01350_mb_bins.csv
3,1903,RGI60-16.01909,-76.9059,-10.2915,16,1,6.223,4485,6317,5221,...,0,9,20080812,1909,16.01909,16.01909,283.0941,"(401, 1132)",9,16.01909_mb_bins.csv
4,1957,RGI60-16.01963,-77.2433,-9.96037,16,1,2.705,4713,5578,5073,...,0,9,20080812,1963,16.01963,16.01963,282.7567,"(400, 1131)",8,16.01963_mb_bins.csv
5,2130,RGI60-16.02137,-77.4339,-9.41648,16,1,5.09,4464,6090,5258,...,0,9,20080812,2137,16.02137,16.02137,282.5661,"(398, 1130)",6,16.02137_mb_bins.csv
6,2165,RGI60-16.02172,-77.3129,-9.5512,16,1,5.165,4520,5652,5107,...,0,9,20080812,2172,16.02172,16.02172,282.6871,"(398, 1131)",7,16.02172_mb_bins.csv
7,2315,RGI60-16.02322,-77.5191,-9.19229,16,1,4.05,4341,6153,5140,...,0,9,20031018,2322,16.02322,16.02322,282.4809,"(397, 1130)",5,16.02322_mb_bins.csv
8,2341,RGI60-16.02348,-77.4705,-9.2756,16,1,3.038,4349,6081,5104,...,0,9,20031018,2348,16.02348,16.02348,282.5295,"(397, 1130)",5,16.02348_mb_bins.csv
9,2387,RGI60-16.02394,-77.582,-9.11097,16,1,9.852,4552,6573,5239,...,0,9,20031018,2394,16.02394,16.02394,282.418,"(396, 1130)",4,16.02394_mb_bins.csv


In [6]:
# print('DELETE ME - HACK FOR DEVELOPMENT')
# print(np.where(main_glac_rgi_wobs['latlon_unique_no'] == 172)[0])
# main_glac_rgi_wobs = main_glac_rgi_wobs.loc[372:373,:]
# main_glac_rgi_wobs['mb_fn'].values

In [7]:
# ===== DEBRIS ELEVATION STATS ====================================================================================
# CALCULATE DEBRIS ELEVATION STATS FOR GLACIERS WITH DATA FOR EACH UNIQUE LAT/LON
elev_stats_latlon_dict = {}
latlon_list_updated = []
rgiid_4cal = []

for nlatlon, latlon_unique in enumerate(np.unique(main_glac_rgi_wobs.latlon_unique_no)):
# for nlatlon, latlon_unique in enumerate([np.unique(main_glac_rgi_wobs.latlon_unique_no)[0]]):

    main_glac_rgi_subset = main_glac_rgi_wobs[main_glac_rgi_wobs['latlon_unique_no'] == latlon_unique]
    main_glac_rgi_subset.reset_index(inplace=True, drop=True)
    
    # Debris elevation stats should be done by lat/lon
    df_all = None
    elev_list_all = []
    df_idx_count = 0
    count_width_passes = 0
    for nglac, glac_fn in enumerate(main_glac_rgi_subset.mb_fn.values):
#     for nglac, glac_fn in enumerate([main_glac_rgi_subset.mb_fn.values[0]]):

        glac_fullfn = debris_prms.mb_binned_fp + glac_fn
        
        glac_str_noleadzero = glac_fullfn.split('/')[-1].split('_')[0]
        rgiid = 'RGI60-' + glac_str_noleadzero.split('.')[0].zfill(2) + '.' + glac_str_noleadzero.split('.')[1]

        assert main_glac_rgi_subset.loc[nglac,'RGIId'] == rgiid, 'RGIId does not matach mass balance filename'
        
        # Select bins that meet calibratioin criteria
        df_raw = pd.read_csv(glac_fullfn)
        df = df_raw.dropna(subset=['mb_bin_mean_mwea'])
        df['z1_bin_areas_perc_cum'] = np.cumsum(df['z1_bin_area_valid_km2']) /df['z1_bin_area_valid_km2'].sum() * 100
        # add width to bins
        widths_fp = debris_prms.oggm_fp + 'widths/' + 'RGI60-' + rgiid.split('-')[1].split('.')[0] + '/'
        widths_fn = rgiid + '_widths_m.csv'
        try:
            # Add width to each elevation bin
            widths_df = pd.read_csv(widths_fp + widths_fn)
            elev_nearidx = (np.abs(df['bin_center_elev_m'].values[:,np.newaxis] - 
                                   widths_df['elev'].values).argmin(axis=1))
            df['width_m'] = widths_df.loc[elev_nearidx,'width_m'].values
        except:
            df['width_m'] = 0
        
        df_idx = np.where((df['vm_med'] <= debris_prms.vel_threshold) 
                          & (df['width_m'] >= debris_prms.width_min_dict[debris_prms.roi])
                          & (df['dc_bin_area_perc'] >= debris_prms.debrisperc_threshold)
                          & (df['dc_bin_count_valid'] >= 10)
                          & (df['z1_bin_areas_perc_cum'] <= debris_prms.term_area_perc)
                          )[0]
        df_debris = df.loc[df_idx,:]
        df_debris.reset_index(inplace=True, drop=True)
        df_idx_count += len(df_idx)
        
            
        if len(df_idx) > 0:
            for nelev, elev in enumerate(list(df_debris['bin_center_elev_m'].values)):
                elev_list_single = list(np.repeat(elev, df_debris.loc[nelev,'dc_bin_count_valid']))
                elev_list_all.extend(elev_list_single)
            
#             # only work with terminus
#             df_idx_dif = list(df_idx[1:] - df_idx[:-1])
#             if np.sum(df_idx_dif) == len(df_idx)-1:
#                 df_idx_nojump = df_idx
#             else:
#                 idx_jumpinbins = df_idx_dif.index(next(filter(lambda x: x>1, df_idx_dif)))
#                 df_idx_nojump = df_idx[0:idx_jumpinbins+1]
#             df_debris_nojump = df_debris.loc[df_idx_nojump,:]
#             df_debris_nojump.reset_index(inplace=True, drop=True)
#             # Median width to ensure terminus velocities can be estimated
#             width_median = np.median(widths_m[np.where(h < df_debris_nojump['bin_center_elev_m'].max())[0]])
#             if width_median > debris_prms.width_min_dict[debris_prms.roi]:
#                 for nelev, elev in enumerate(list(df_debris_nojump['bin_center_elev_m'].values)):
#                     elev_list_single = list(np.repeat(elev, df_debris_nojump.loc[nelev,'dc_bin_count_valid']))
#                     elev_list_all.extend(elev_list_single)
#                 count_width_passes += 1
    
            rgiid_4cal.append(rgiid.split('-')[1])
        
    if df_idx_count > 0:
        dc_zmean = np.mean(elev_list_all)
        dc_zstd = np.std(elev_list_all)
        dc_zmed = malib.fast_median(elev_list_all)
        dc_zmad = malib.mad(elev_list_all)
        
        lat_deg = float(ds.latitude[latlon_unique_dict_reversed[latlon_unique][0]].values)
        lon_deg = float(ds.longitude[latlon_unique_dict_reversed[latlon_unique][1]].values)
        elev_stats_latlon_dict[lat_deg,lon_deg] = [dc_zmean, dc_zstd, dc_zmed, dc_zmad]
        latlon_list_updated.append((lat_deg, lon_deg))
        
print('unique lat/lons updated:', len(latlon_list_updated))
# Update pickle of unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list_updated, f)

unique lat/lons updated: 9


In [8]:
# Statistics of data coverage
rgiid_4cal = sorted(rgiid_4cal)
main_glac_rgi_4cal = debris_prms.selectglaciersrgitable(rgiid_4cal)
dc_area_dict = dict(zip(dc_shp.RGIId.values, dc_shp.DC_Area_v2.values))
main_glac_rgi_4cal['DC_Area_v2'] = main_glac_rgi_4cal.RGIId.map(dc_area_dict)
print('\nDC glaciers (used for cal):', main_glac_rgi_4cal.shape[0], 
      'DC Area (used for cal, km2):', main_glac_rgi_4cal.DC_Area_v2.sum() / 1e6)

19 glaciers in region 16 are included in this model run: ['01339', '01350', '01909', '01963', '02137', '02172', '02322', '02348', '02394', '02410', '02412', '02420', '02433', '02457', '02482', '02483', '02497', '02518', '02566']
This study is focusing on 19 glaciers in region [16]

DC glaciers (used for cal): 19 DC Area (used for cal, km2): 16.551373


In [9]:
# ===== ADD DEBRIS ELEVATION STATS TO MET DATA ======
overwrite_dc_stats = True
for nlatlon, latlon in enumerate(latlon_list_updated):
# for nlatlon, latlon in enumerate([latlon_list_updated[0]]):
    
    lat_deg = latlon[0]
    lon_deg = latlon[1]
    
    print(nlatlon, lat_deg, lon_deg)
    
    if lat_deg < 0:
        lat_str = 'S-'
    else:
        lat_str = 'N-' 

    # ===== Meteorological data =====
    metdata_fn = debris_prms.metdata_fn_sample.replace(
        'XXXX', str(int(np.abs(lat_deg)*100)) + lat_str + str(int(lon_deg*100)) + 'E-')
    
    ds = xr.open_dataset(debris_prms.metdata_fp + metdata_fn) 
    try:
        print('  existed:', ds.dc_zmean.values, 'vs', elev_stats_latlon_dict[latlon][0])
    except:
        pass
    if 'dc_zmean' not in list(ds.keys()) or overwrite_dc_stats:
        # Add stats
        ds['dc_zmean'] = elev_stats_latlon_dict[latlon][0]
        ds['dc_zmean'].attrs = {'units':'m a.s.l.', 'long_name':'Mean debris cover elevation', 
                                'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zstd'] = elev_stats_latlon_dict[latlon][1]
        ds['dc_zstd'].attrs = {'units':'m a.s.l.', 'long_name':'Standard deviation of debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zmed'] = elev_stats_latlon_dict[latlon][2]
        ds['dc_zmed'].attrs = {'units':'m a.s.l.', 'long_name':'Median debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zmad'] = elev_stats_latlon_dict[latlon][3]
        ds['dc_zmad'].attrs = {'units':'m a.s.l.', 'long_name':'Median absolute deviation of debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}

        try:
            ds.close()
        except:
            continue
            
        # Export updated dataset
        ds.to_netcdf(debris_prms.metdata_fp + metdata_fn, mode='a')
    else:
        print(lat_deg, lon_deg, 'exists')

0 3.0 284.0
1 -0.5 281.75
2 -9.0 282.25
3 -9.0 282.5
4 -9.25 282.5
5 -9.5 282.5
6 -9.5 282.75
7 -10.0 282.75
8 -10.25 283.0


In [10]:
print('DONE!')

DONE!
