In [1]:
#! /usr/bin/env python
"""
Resample debris thickness data to enable regional stats to be computed
"""
import sys
import os
import re
import subprocess
from datetime import datetime, timedelta
import time
import pickle
from collections import OrderedDict

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from rasterio.merge import merge
from rasterio.warp import calculate_default_transform, reproject, Resampling
from scipy import ndimage
from scipy.optimize import curve_fit
from scipy.optimize import minimize
from scipy.stats import median_absolute_deviation
import xarray as xr
from osgeo import gdal, ogr, osr

from pygeotools.lib import malib, warplib, geolib, iolib, timelib


import debrisglobal.globaldebris_input as debris_prms
from debrisglobal.glacfeat import GlacFeat, create_glacfeat
from meltcurves import melt_fromdebris_func
from meltcurves import debris_frommelt_func
from spc_split_lists import split_list


debug=False
verbose=False

In [2]:
def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    values, weights -- Numpy ndarrays with the same shape.
    """
    average = np.average(values, weights=weights)
    # Fast and numerically precise:
    variance = np.average((values-average)**2, weights=weights)
    return average, variance**0.5


def weighted_percentile(sorted_list, weights, percentile):
    """
    Calculate weighted percentile of a sorted list
    """
    weights_cumsum_norm_high = np.cumsum(weights) / np.sum(weights)
#     print(weights_cumsum_norm_high)
    weights_norm = weights / np.sum(weights)
    weights_cumsum_norm_low = weights_cumsum_norm_high - weights_norm
#     print(weights_cumsum_norm_low)
    
    percentile_idx_high = np.where(weights_cumsum_norm_high >= percentile)[0][0]
#     print(percentile_idx_high)
    percentile_idx_low = np.where(weights_cumsum_norm_low <= percentile)[0][-1]
#     print(percentile_idx_low)
    
    if percentile_idx_low == percentile_idx_high:
        value_percentile = sorted_list[percentile_idx_low]
    else:
        value_percentile = np.mean([sorted_list[percentile_idx_low], sorted_list[percentile_idx_high]])

    return value_percentile


def pickle_data(fn, data):
    """Pickle data
    
    Parameters
    ----------
    fn : str
        filename including filepath
    data : list, etc.
        data to be pickled
    
    Returns
    -------
    .pkl file
        saves .pkl file of the data
    """
    with open(fn, 'wb') as f:
        pickle.dump(data, f)

#Function to generate a 3-panel plot for input arrays
def plot_array(dem, clim=None, titles=None, cmap='inferno', label=None, overlay=None, fn=None, close_fig=True):
    fig, ax = plt.subplots(1,1, sharex=True, sharey=True, figsize=(10,5))
    alpha = 1.0
    #Gray background
    ax.set_facecolor('0.5')
    #Force aspect ratio to match images
    ax.set(aspect='equal')
    #Turn off axes labels/ticks
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    if titles is not None:
        ax.set_title(titles[0])
    #Plot background shaded relief map
    if overlay is not None:
        alpha = 0.7
        ax.imshow(overlay, cmap='gray', clim=(1,255))
    #Plot each array
    im_list = [ax.imshow(dem, clim=clim, cmap=cmap, alpha=alpha)]
    fig.tight_layout()
    fig.colorbar(im_list[0], label=label, extend='both', shrink=0.5)
    if fn is not None:
        fig.savefig(fn, bbox_inches='tight', pad_inches=0, dpi=150)
    if close_fig:
        plt.close(fig)

In [3]:
# Glaciers optimized
overwrite = False
rois = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14', '15', '16','17','18']
# rois = ['15']

# Percentiles
percentiles = [0.025, 0.05, 0.16, 0.25, 0.5, 0.75, 0.84, 0.95, 0.975]

# Uncertainty dataframe and dictionary for bounds
hd_uncertainty_fullfn = debris_prms.output_fp + 'hd_uncertainty_bnds-1std.csv'
hd_uncertainty_df = pd.read_csv(hd_uncertainty_fullfn)
hd_uncertainty_dict_low = dict(zip([int(np.round(x*100)) for x in hd_uncertainty_df['hd_m']], 
                                   list(hd_uncertainty_df['hd_bndlow_both'].values)))
hd_uncertainty_dict_low[0] = 0
hd_uncertainty_dict_low[1] = 0
hd_uncertainty_dict_high = dict(zip([int(np.round(x*100)) for x in hd_uncertainty_df['hd_m']], 
                                   list(hd_uncertainty_df['hd_bndhigh_both'].values)))
hd_uncertainty_dict_high[0] = hd_uncertainty_df.loc[0,'hd_bndhigh_both']
hd_uncertainty_dict_high[1] = hd_uncertainty_df.loc[0,'hd_bndhigh_both']



# Regional stats dataframe
reg_stats_fullfn = debris_prms.output_fp + 'reg_stats_hd_mf.csv'
reg_stats_cns = ['roi', 'dc_area_km2', 'dc_area_km2_lt_10cm', 'dc_area_km2_lt_50cm', 'dc_area_km2_lt_1m',
                 'hd_mean', 'hd_std', 
                 'hd_025', 'hd_05', 'hd_16', 'hd_25', 'hd_med', 'hd_75', 'hd_84', 'hd_95', 'hd_975',
                 'hd_low_mean', 'hd_low_std', 
                 'hd_low_025', 'hd_low_05', 'hd_low_16', 'hd_low_25', 'hd_low_med', 'hd_low_75', 'hd_low_84', 'hd_low_95', 'hd_low_975',
                 'hd_high_mean', 'hd_high_std', 
                 'hd_high_025', 'hd_high_05', 'hd_high_16', 'hd_high_25', 'hd_high_med', 'hd_high_75', 'hd_high_84', 'hd_high_95', 'hd_high_975',
                 'mf_mean', 'mf_std', 
                 'mf_025', 'mf_05', 'mf_16', 'mf_25', 'mf_med', 'mf_75', 'mf_84', 'mf_95', 'mf_975',
                 'mf_low_mean', 'mf_low_std', 
                 'mf_low_025', 'mf_low_05', 'mf_low_16', 'mf_low_25', 'mf_low_med', 'mf_low_75', 'mf_low_84', 'mf_low_95', 'mf_low_975',
                 'mf_high_mean', 'mf_high_std', 
                 'mf_high_025', 'mf_high_05', 'mf_high_16', 'mf_high_25', 'mf_high_med', 'mf_high_75', 'mf_high_84', 'mf_high_95', 'mf_high_975']
reg_stats_df = pd.DataFrame(np.zeros((len(rois)+1,len(reg_stats_cns))), columns=reg_stats_cns)


## ===== REGIONAL MELT FACTOR STATISTICS =====
hd_list_all_global = []
hd_list_all_low_global = []
hd_list_all_high_global = []
mf_list_all_global = []
mf_list_all_low_global = []
mf_list_all_high_global = []
area_m2_list_all_global = []
for nroi, roi in enumerate(rois):
        
    print('roi:', roi)
    
    # Load file if it already exists
    list_fp = debris_prms.output_fp + 'pickle_datasets/'
    if not os.path.exists(list_fp):
        os.makedirs(list_fp)
    hd_list_all_fullfn = list_fp + roi + '_hd_list_all.pkl'
    mf_list_all_fullfn = list_fp + roi + '_mf_list_all.pkl'
    area_m2_list_all_fullfn = list_fp + roi + '_area_m2_list_all.pkl'
    

    if os.path.exists(hd_list_all_fullfn.replace('.pkl','_low.pkl')) and not overwrite:
        # Debris thickness
        with open(hd_list_all_fullfn, 'rb') as f:
            hd_list_all = pickle.load(f)
        with open(hd_list_all_fullfn.replace('.pkl','_low.pkl'), 'rb') as f:
            hd_list_all_low = pickle.load(f)
        with open(hd_list_all_fullfn.replace('.pkl','_high.pkl'), 'rb') as f:
            hd_list_all_high = pickle.load(f)
        # Melt factor
        with open(mf_list_all_fullfn, 'rb') as f:
            mf_list_all = pickle.load(f)
        with open(mf_list_all_fullfn.replace('.pkl','_low.pkl'), 'rb') as f:
            mf_list_all_low = pickle.load(f)
        with open(mf_list_all_fullfn.replace('.pkl','_high.pkl'), 'rb') as f:
            mf_list_all_high = pickle.load(f)
        # Area
        with open(area_m2_list_all_fullfn, 'rb') as f:
            area_m2_list_all = pickle.load(f)
    else:
        
        rgiids = []
        hd_fns = []
        # Filepaths
        if roi in ['13', '14', '15']:
            hd_fp = debris_prms.output_fp + 'ts_tif/hd_tifs/HMA/'
            hdopt_prms_fp = debris_prms.output_fp + 'hd_opt_prms/HMA/'
        else:
            hd_fp = debris_prms.output_fp + 'ts_tif/hd_tifs/' + roi + '/'
            hdopt_prms_fp = debris_prms.output_fp + 'hd_opt_prms/' + roi + '/'
        hd_fp_extrap = hd_fp + 'extrap/'
        hdopt_prms_fp_extrap = hdopt_prms_fp + '/_extrap/'
        mf_fp = hd_fp + 'meltfactor/'
        mf_fp_extrap = hd_fp_extrap + 'meltfactor/'

        # Glaciers optimized
        glac_hd_fullfns = []
        for i in os.listdir(hd_fp):
            if i.endswith('hdts_m.tif'):
                reg_str = str(int(i.split('.')[0])).zfill(2)
                if reg_str == roi:
                    hd_fns.append(i)
                    rgiids.append(i.split('_')[0])

        # Glaciers extrapolated
        for i in os.listdir(hd_fp_extrap):
            if i.endswith('hdts_m_extrap.tif'):
                reg_str = str(int(i.split('.')[0])).zfill(2)
                if reg_str == roi:
                    hd_fns.append(i)
                    rgiids.append(i.split('_')[0])

        # Sorted files        
        hd_fns = [x for _,x in sorted(zip(rgiids, hd_fns))]
        rgiids = sorted(rgiids)     

        main_glac_rgi = debris_prms.selectglaciersrgitable(rgiids)
        main_glac_rgi['CenLon_360'] = main_glac_rgi['CenLon']
        main_glac_rgi.loc[main_glac_rgi['CenLon_360'] < 0, 'CenLon_360'] = (
            360 + main_glac_rgi.loc[main_glac_rgi['CenLon_360'] < 0, 'CenLon_360'])
        main_glac_rgi['hd_fn'] = hd_fns
        
        hd_list_all = []
        hd_list_all_low = []
        hd_list_all_high = []
        mf_list_all = []
        mf_list_all_low = []
        mf_list_all_high = []
        area_m2_list_all = []
        for nglac, glac_idx in enumerate(main_glac_rgi.index.values):
#         for nglac, glac_idx in enumerate(main_glac_rgi.index.values[613:614]):
            glac_str = main_glac_rgi.loc[glac_idx,'rgino_str']
            rgiid = main_glac_rgi.loc[glac_idx,'RGIId']
            region = glac_str.split('.')[0]

            if int(region) < 10:
                glac_str_noleadzero = str(int(glac_str.split('.')[0])) + '.' + glac_str.split('.')[1]
            else:
                glac_str_noleadzero = glac_str

            if nglac%1000 == 0:
#             if nglac%1 == 0:
                print(nglac, glac_str)

            # Create glacier feature from ice thickness raster
            thick_dir = debris_prms.oggm_fp + 'thickness/RGI60-' + str(region.zfill(2)) + '/'
            thick_fn = 'RGI60-' + str(region.zfill(2)) + '.' + rgiid.split('.')[1] + '_thickness.tif'

            gf = create_glacfeat(thick_dir, thick_fn)

            # =====FILENAMES =====
            # Add the filenames
            fn_dict = OrderedDict()
            # DEM
            z1_fp = debris_prms.oggm_fp + 'dems/RGI60-' + str(region.zfill(2)) + '/'
            z1_fn = 'RGI60-' + str(region.zfill(2)) + '.' + rgiid.split('.')[1] + '_dem.tif'
            fn_dict['z1'] = z1_fp + z1_fn

            # Debris thickness and melt factors
            hd_fn = main_glac_rgi.loc[glac_idx, 'hd_fn']
            if '_extrap' not in hd_fn:
                hd_fullfn = hd_fp + hd_fn
                mf_fullfn = mf_fp + hd_fn.replace('hdts_m', 'meltfactor')
                hdopt_prms_fullfn = hdopt_prms_fp + glac_str_noleadzero + '_hdopt_prms.csv'
            else:
                hd_fullfn = hd_fp_extrap + hd_fn
                mf_fullfn = mf_fp_extrap + hd_fn.replace('hdts_m', 'meltfactor')
                hdopt_prms_fullfn = hdopt_prms_fp_extrap + glac_str + '_hdopt_prms_extrap.csv'
                
            fn_dict['debris_thick_ts'] = hd_fullfn
            fn_dict['meltfactor_ts'] = mf_fullfn

            # Ice thickness
            thick_dir = debris_prms.oggm_fp + 'thickness/RGI60-' + str(region.zfill(2)) + '/'
            thick_fn = 'RGI60-' + str(region.zfill(2)) + '.' + rgiid.split('.')[1] + '_thickness.tif'
            fn_dict['ice_thick'] = thick_dir + thick_fn

            # ===== PROCESS THE DATA =====
            #Expand extent to include buffered region around glacier polygon
            warp_extent = geolib.pad_extent(gf.glac_geom_extent, width=debris_prms.buff_dist)
            if verbose:
                print("Expanding extent")
                print(gf.glac_geom_extent)
                print(warp_extent)
                print(gf.aea_srs)

            #Warp everything to common res/extent/proj
            z1_gt = gdal.Open(fn_dict['z1']).GetGeoTransform()
            z1_res = np.min([z1_gt[1], -z1_gt[5]])
            # resampling algorithm
            r_resampling = 'cubic'
            ds_list = warplib.memwarp_multi_fn(fn_dict.values(), res=z1_res, extent=warp_extent, 
                                               t_srs=gf.aea_srs, verbose=verbose, r=r_resampling)
            ds_dict = dict(zip(fn_dict.keys(), ds_list))
            gf.ds_dict = ds_dict

            if verbose:
                print(ds_list)
                print(fn_dict.keys())

            glac_geom_mask = geolib.geom2mask(gf.glac_geom, ds_dict['z1'])
            gf.z1 = np.ma.array(iolib.ds_getma(ds_dict['z1']), mask=glac_geom_mask)
            
            # Debris thickness values of 0 are masked (use meltfactor mask instead)
            gf.meltfactor_ts = np.ma.array(iolib.ds_getma(ds_dict['meltfactor_ts']), mask=glac_geom_mask)
            gf.debris_thick_ts = np.ma.array(iolib.ds_getma(ds_dict['debris_thick_ts']), mask=glac_geom_mask)
            gf.debris_thick_ts = np.ma.array(gf.debris_thick_ts.data, mask=gf.meltfactor_ts.mask)
            
#             # Melt factors are masked so only calculate over areas with debris > 0
#             gf.debris_thick_ts = np.ma.array(iolib.ds_getma(ds_dict['debris_thick_ts']), mask=glac_geom_mask)
#             gf.meltfactor_ts = np.ma.array(iolib.ds_getma(ds_dict['meltfactor_ts']), mask=glac_geom_mask)       
#             gf.meltfactor_ts = np.ma.array(gf.meltfactor_ts.data, mask=gf.debris_thick_ts.mask)
            
            gf.res = geolib.get_res(ds_dict['z1'])

            if verbose:
                print('\n\n# z1 pixels:', gf.z1.count(), '\n')
                
            

            # ===== PLOTS =====
            show_plots = False
            if debug and show_plots:
                # DEM
                var_full2plot = gf.z1.copy()
                clim = malib.calcperc(var_full2plot, (2,98))
                plot_array(var_full2plot, clim, [glac_str + ' DEM'], 'inferno', 'elev (masl)', close_fig=False)
                # Debris thickness
                var_full2plot = gf.debris_thick_ts.copy()
                clim = (0,1)
                plot_array(var_full2plot, clim, [gf.glacnum + ' hd (from ts)'], 'inferno', 'hd (m)', 
                           close_fig=False)
                # Melt factor
                var_full2plot = gf.meltfactor_ts.copy()
                clim = (0,1)
                plot_array(var_full2plot, clim, [gf.glacnum + ' meltfactor'], 'inferno', 'mf (-)',
                           close_fig=False)
    #             # Surface temperature
    #             var_full2plot = gf.ts.copy()
    #             clim = malib.calcperc(var_full2plot, (2,98))
    #             plot_array(var_full2plot, clim, [glac_str + ' Ts'], 'inferno', 'ts (degC)', close_fig=False)

            # Get list of values
            hd_list = list(gf.debris_thick_ts.compressed())
            mf_list = list(gf.meltfactor_ts.compressed())
            
            # remove nan values
            hd_list = [0 if np.isnan(x) else x for x in hd_list]
            
            if len(hd_list) > 0:
                # Remove nan values
                hd_array_nonan = np.array(hd_list)
                nan_idx_list = [x[0] for x in list(np.argwhere(np.isnan(hd_array_nonan)))]
                if len(nan_idx_list) > 0:
                    hd_list = [y for x,y in enumerate(hd_list) if x not in nan_idx_list]
                    mf_list = [y for x,y in enumerate(mf_list) if x not in nan_idx_list]

                assert len(hd_list) == len(mf_list), 'hd_list and mf_list differ; NEED TO MASK THESE VALUES OR RE-PROCESS'
                rounding_err = 1e-6
                assert np.max(hd_list) <= debris_prms.hd_max + rounding_err and np.min(hd_list) >= -rounding_err, 'hd outside of bounds' # rounding error may give -1e-12 for some values
                assert np.min(mf_list) >= -rounding_err, 'negative melt factor' 
                assert np.max(mf_list) <= 10, 'melt factor greater than 10!'

                pixel_m2 = abs(gf.res[0] * gf.res[1])
                area_m2_list = [pixel_m2] * len(hd_list)

                # Append to existing
                hd_list_all.extend(hd_list)
                mf_list_all.extend(mf_list)
                area_m2_list_all.extend(area_m2_list)
                
                
                # ----- Uncertainty: hd_list and mf_list -----
                # Uncertainty for lower and upper bounds
                hd_list_low = [hd_uncertainty_dict_low[x] for x in list(np.round(np.array(hd_list)*100,0).astype(int))]
                hd_list_high = [hd_uncertainty_dict_high[x] for x in list(np.round(np.array(hd_list)*100,0).astype(int))]

                # Optimized parameters for melt factor uncertainties
                df_opt = pd.read_csv(hdopt_prms_fullfn)
                melt_2cm = df_opt.loc[0,'melt_mwea_2cm']
                melt_cleanice = df_opt.loc[0,'melt_mwea_clean']
                func_coeff = [df_opt.loc[0,'b0'], df_opt.loc[0,'k']]

                # Melt factor (lower bound)
                mf_array_low = melt_fromdebris_func(np.array(hd_list_low), func_coeff[0], func_coeff[1]) / melt_cleanice
                # limit melt rates to modeled 2 cm rate
                mf_array_low[mf_array_low > melt_2cm / melt_cleanice] = melt_2cm / melt_cleanice
                # Linearly interpolate between 0 cm and 2 cm for the melt rate
                def meltfactor_0to2cm_adjustment(mf, melt_clean, melt_2cm, hd):
                    """ Linearly interpolate melt factors between 0 and 2 cm 
                        based on clean ice and 2 cm sub-debris melt """
                    mf = np.nan_to_num(mf,0)
                    mf[(hd >= 0) & (hd < 0.02)] = (
                        1 + hd[(hd >= 0) & (hd < 0.02)] / 0.02 * (melt_2cm - melt_clean) / melt_clean)
                    return mf
                mf_array_low = meltfactor_0to2cm_adjustment(mf_array_low, melt_cleanice, melt_2cm, np.array(hd_list_low))

                # Melt factor (lower bound)
                mf_array_high = melt_fromdebris_func(np.array(hd_list_high), func_coeff[0], func_coeff[1]) / melt_cleanice
                mf_array_high[mf_array_high > melt_2cm / melt_cleanice] = melt_2cm / melt_cleanice
                mf_array_high = meltfactor_0to2cm_adjustment(mf_array_high, melt_cleanice, melt_2cm, np.array(hd_list_high))

                # Append lists
                hd_list_all_low.extend(hd_list_low)
                hd_list_all_high.extend(hd_list_high)
                mf_list_all_low.extend(list(mf_array_low))
                mf_list_all_high.extend(list(mf_array_high))

        # ===== EXPORT LISTS =====
        pickle_data(hd_list_all_fullfn, hd_list_all)
        pickle_data(hd_list_all_fullfn.replace('.pkl','_low.pkl'), hd_list_all_low)
        pickle_data(hd_list_all_fullfn.replace('.pkl','_high.pkl'), hd_list_all_high)
        pickle_data(mf_list_all_fullfn, mf_list_all)
        pickle_data(mf_list_all_fullfn.replace('.pkl','_low.pkl'), mf_list_all_low)
        pickle_data(mf_list_all_fullfn.replace('.pkl','_high.pkl'), mf_list_all_high)
        pickle_data(area_m2_list_all_fullfn, area_m2_list_all)
        
    
    # Aggregate global data
    hd_list_all_global.extend(hd_list_all)
    hd_list_all_low_global.extend(hd_list_all_low)
    hd_list_all_high_global.extend(hd_list_all_high)
    mf_list_all_global.extend(mf_list_all)
    mf_list_all_low_global.extend(mf_list_all_low)
    mf_list_all_high_global.extend(mf_list_all_high)
    area_m2_list_all_global.extend(area_m2_list_all)
    
    
    def reg_stats_weighted_fromlist(list_all, area_m2_list_all, percentiles, print_name=None):
        """ Compute weighted regional stats based on list of debris thickness or melt factors and area"""
        # Sort for weighted statistics
        sorted_area_m2 = [x for _,x in sorted(zip(list_all, area_m2_list_all))]
        sorted_list = sorted(list_all)
        
        # Regional statistics
        list_mean, list_std = weighted_avg_and_std(sorted_list, weights=sorted_area_m2)
        if print_name is not None:
            print(print_name + '_mean (+/- std): ' + str(np.round(list_mean,2)) + ' +/- ' + str(np.round(list_std,2)))
        reg_stats_values = []
        reg_stats_values.append(list_mean)
        reg_stats_values.append(list_std)
        for percentile in percentiles:
            value_percentile = weighted_percentile(sorted_list, sorted_area_m2, percentile)
            reg_stats_values.append(value_percentile)
            print('  ' + print_name + ' percentile (' + str(percentile) + '): ' +  str(np.round(value_percentile,2)))
        return reg_stats_values
    
    # Compute regional statistics
    reg_stats_values = [roi, np.sum(area_m2_list_all) / 1e6]
    # ----- Debris-covered area for various thresholds -----
    hd_list_all_array = np.array(hd_list_all)
    area_km2_list_all_array = np.array(area_m2_list_all) / 1e6
    for hd_threshold in [0.1, 0.5, 1.]:
        hd_idxs = np.where(hd_list_all_array < hd_threshold)[0]
        if len(hd_idxs) > 0:
            dc_area_km2_lt_threshold = area_km2_list_all_array[hd_idxs].sum()
        else:
            dc_area_km2_lt_threshold = 0 
        reg_stats_values.extend([dc_area_km2_lt_threshold])
    # ----- Debris thickness -----
    reg_stats_subset = reg_stats_weighted_fromlist(hd_list_all, area_m2_list_all, percentiles, print_name='hd')
    reg_stats_values.extend(reg_stats_subset)
    # Debris thickness (low uncertainty)
    reg_stats_subset = reg_stats_weighted_fromlist(hd_list_all_low, area_m2_list_all, percentiles, print_name='hd_low')
    reg_stats_values.extend(reg_stats_subset)
    # Debris thickness (high uncertainty)
    reg_stats_subset = reg_stats_weighted_fromlist(hd_list_all_high, area_m2_list_all, percentiles, print_name='hd_high')
    reg_stats_values.extend(reg_stats_subset)
    # ----- Melt factor -----
    reg_stats_subset = reg_stats_weighted_fromlist(mf_list_all, area_m2_list_all, percentiles, print_name='mf')
    reg_stats_values.extend(reg_stats_subset)
    # Melt factor (low uncertainty)
    reg_stats_subset = reg_stats_weighted_fromlist(mf_list_all_low, area_m2_list_all, percentiles, print_name='mf_low')
    reg_stats_values.extend(reg_stats_subset)
    # Melt factor
    reg_stats_subset = reg_stats_weighted_fromlist(mf_list_all_high, area_m2_list_all, percentiles, print_name='mf_high')
    reg_stats_values.extend(reg_stats_subset)
    
    # Record regional stats
    reg_stats_df.loc[nroi,:] = reg_stats_values
    
#     print(reg_stats_values)
    print('\n')
    
    
    
# GLOBAL STATISTICS
nroi += 1
hd_list_all = hd_list_all_global
hd_list_all_low = hd_list_all_low_global
hd_list_all_high = hd_list_all_high_global
mf_list_all = mf_list_all_global
mf_list_all_low = mf_list_all_low_global
mf_list_all_high = mf_list_all_high_global
area_m2_list_all = area_m2_list_all_global

# Compute regional statistics
reg_stats_values = ['all', np.sum(area_m2_list_all) / 1e6]
# ----- Debris-covered area for various thresholds -----
hd_list_all_array = np.array(hd_list_all)
area_km2_list_all_array = np.array(area_m2_list_all) / 1e6
for hd_threshold in [0.1, 0.5, 1.]:
    hd_idxs = np.where(hd_list_all_array < hd_threshold)[0]
    if len(hd_idxs) > 0:
        dc_area_km2_lt_threshold = area_km2_list_all_array[hd_idxs].sum()
    else:
        dc_area_km2_lt_threshold = 0 
    reg_stats_values.extend([dc_area_km2_lt_threshold])
# ----- Debris thickness -----
reg_stats_subset = reg_stats_weighted_fromlist(hd_list_all, area_m2_list_all, percentiles, print_name='hd')
reg_stats_values.extend(reg_stats_subset)
# Debris thickness (low uncertainty)
reg_stats_subset = reg_stats_weighted_fromlist(hd_list_all_low, area_m2_list_all, percentiles, print_name='hd_low')
reg_stats_values.extend(reg_stats_subset)
# Debris thickness (high uncertainty)
reg_stats_subset = reg_stats_weighted_fromlist(hd_list_all_high, area_m2_list_all, percentiles, print_name='hd_high')
reg_stats_values.extend(reg_stats_subset)
# ----- Melt factor -----
reg_stats_subset = reg_stats_weighted_fromlist(mf_list_all, area_m2_list_all, percentiles, print_name='mf')
reg_stats_values.extend(reg_stats_subset)
# Melt factor (low uncertainty)
reg_stats_subset = reg_stats_weighted_fromlist(mf_list_all_low, area_m2_list_all, percentiles, print_name='mf_low')
reg_stats_values.extend(reg_stats_subset)
# Melt factor
reg_stats_subset = reg_stats_weighted_fromlist(mf_list_all_high, area_m2_list_all, percentiles, print_name='mf_high')
reg_stats_values.extend(reg_stats_subset)

reg_stats_df.loc[nroi,:] = reg_stats_values

# ==== Export regional stats =====
reg_stats_df.to_csv(reg_stats_fullfn, index=False)

roi: 01
hd_mean (+/- std): 0.4 +/- 0.61
  hd percentile (0.025): 0.0
  hd percentile (0.05): 0.0
  hd percentile (0.16): 0.03
  hd percentile (0.25): 0.05
  hd percentile (0.5): 0.18
  hd percentile (0.75): 0.46
  hd percentile (0.84): 0.66
  hd percentile (0.95): 1.73
  hd percentile (0.975): 3.0
hd_low_mean (+/- std): 0.24 +/- 0.33
  hd_low percentile (0.025): 0
  hd_low percentile (0.05): 0
  hd_low percentile (0.16): 0.01
  hd_low percentile (0.25): 0.03
  hd_low percentile (0.5): 0.12
  hd_low percentile (0.75): 0.3
  hd_low percentile (0.84): 0.42
  hd_low percentile (0.95): 1.01
  hd_low percentile (0.975): 1.51
hd_high_mean (+/- std): 0.7 +/- 1.41
  hd_high percentile (0.025): 0.04
  hd_high percentile (0.05): 0.04
  hd_high percentile (0.16): 0.06
  hd_high percentile (0.25): 0.08
  hd_high percentile (0.5): 0.25
  hd_high percentile (0.75): 0.63
  hd_high percentile (0.84): 0.92
  hd_high percentile (0.95): 2.87
  hd_high percentile (0.975): 7.66
mf_mean (+/- std): 0.6 +/- 0.

  hd_low percentile (0.25): 0.01
  hd_low percentile (0.5): 0.06
  hd_low percentile (0.75): 0.14
  hd_low percentile (0.84): 0.22
  hd_low percentile (0.95): 0.5
  hd_low percentile (0.975): 0.79
hd_high_mean (+/- std): 0.36 +/- 0.89
  hd_high percentile (0.025): 0.04
  hd_high percentile (0.05): 0.04
  hd_high percentile (0.16): 0.04
  hd_high percentile (0.25): 0.06
  hd_high percentile (0.5): 0.15
  hd_high percentile (0.75): 0.31
  hd_high percentile (0.84): 0.45
  hd_high percentile (0.95): 1.11
  hd_high percentile (0.975): 1.97
mf_mean (+/- std): 0.79 +/- 0.43
  mf percentile (0.025): 0.09
  mf percentile (0.05): 0.15
  mf percentile (0.16): 0.31
  mf percentile (0.25): 0.43
  mf percentile (0.5): 0.79
  mf percentile (0.75): 1.01
  mf percentile (0.84): 1.19
  mf percentile (0.95): 1.53
  mf percentile (0.975): 1.67
mf_low_mean (+/- std): 0.9 +/- 0.4
  mf_low percentile (0.025): 0.15
  mf_low percentile (0.05): 0.22
  mf_low percentile (0.16): 0.44
  mf_low percentile (0.25): 

mf_mean (+/- std): 0.7 +/- 0.37
  mf percentile (0.025): 0.06
  mf percentile (0.05): 0.09
  mf percentile (0.16): 0.23
  mf percentile (0.25): 0.35
  mf percentile (0.5): 0.78
  mf percentile (0.75): 1.02
  mf percentile (0.84): 1.05
  mf percentile (0.95): 1.18
  mf percentile (0.975): 1.29
mf_low_mean (+/- std): 0.79 +/- 0.34
  mf_low percentile (0.025): 0.09
  mf_low percentile (0.05): 0.14
  mf_low percentile (0.16): 0.33
  mf_low percentile (0.25): 0.51
  mf_low percentile (0.5): 0.99
  mf_low percentile (0.75): 1.01
  mf_low percentile (0.84): 1.03
  mf_low percentile (0.95): 1.18
  mf_low percentile (0.975): 1.25
mf_high_mean (+/- std): 0.52 +/- 0.29
  mf_high percentile (0.025): 0.04
  mf_high percentile (0.05): 0.07
  mf_high percentile (0.16): 0.17
  mf_high percentile (0.25): 0.26
  mf_high percentile (0.5): 0.55
  mf_high percentile (0.75): 0.77
  mf_high percentile (0.84): 0.84
  mf_high percentile (0.95): 0.97
  mf_high percentile (0.975): 1.07


roi: 10
hd_mean (+/- std

  mf_low percentile (0.25): 0.3
  mf_low percentile (0.5): 0.64
  mf_low percentile (0.75): 1.07
  mf_low percentile (0.84): 1.21
  mf_low percentile (0.95): 1.42
  mf_low percentile (0.975): 1.51
mf_high_mean (+/- std): 0.47 +/- 0.4
  mf_high percentile (0.025): 0.02
  mf_high percentile (0.05): 0.03
  mf_high percentile (0.16): 0.1
  mf_high percentile (0.25): 0.15
  mf_high percentile (0.5): 0.33
  mf_high percentile (0.75): 0.72
  mf_high percentile (0.84): 0.99
  mf_high percentile (0.95): 1.29
  mf_high percentile (0.975): 1.38


roi: 14
hd_mean (+/- std): 0.36 +/- 0.57
  hd percentile (0.025): 0.01
  hd percentile (0.05): 0.01
  hd percentile (0.16): 0.04
  hd percentile (0.25): 0.06
  hd percentile (0.5): 0.16
  hd percentile (0.75): 0.4
  hd percentile (0.84): 0.59
  hd percentile (0.95): 1.4
  hd percentile (0.975): 2.83
hd_low_mean (+/- std): 0.21 +/- 0.31
  hd_low percentile (0.025): 0
  hd_low percentile (0.05): 0
  hd_low percentile (0.16): 0.02
  hd_low percentile (0.25)

  mf_high percentile (0.84): 0.78
  mf_high percentile (0.95): 1.12
  mf_high percentile (0.975): 1.33


roi: 18
hd_mean (+/- std): 0.29 +/- 0.4
  hd percentile (0.025): 0.01
  hd percentile (0.05): 0.02
  hd percentile (0.16): 0.04
  hd percentile (0.25): 0.06
  hd percentile (0.5): 0.17
  hd percentile (0.75): 0.37
  hd percentile (0.84): 0.5
  hd percentile (0.95): 0.94
  hd percentile (0.975): 1.34
hd_low_mean (+/- std): 0.18 +/- 0.23
  hd_low percentile (0.025): 0
  hd_low percentile (0.05): 0.0
  hd_low percentile (0.16): 0.02
  hd_low percentile (0.25): 0.03
  hd_low percentile (0.5): 0.11
  hd_low percentile (0.75): 0.24
  hd_low percentile (0.84): 0.32
  hd_low percentile (0.95): 0.59
  hd_low percentile (0.975): 0.81
hd_high_mean (+/- std): 0.45 +/- 0.82
  hd_high percentile (0.025): 0.04
  hd_high percentile (0.05): 0.04
  hd_high percentile (0.16): 0.07
  hd_high percentile (0.25): 0.09
  hd_high percentile (0.5): 0.24
  hd_high percentile (0.75): 0.51
  hd_high percentile 

In [5]:
# # Melt factor stats
#     mf_mean, mf_std = weighted_avg_and_std(mf_list_all, weights=area_m2_list_all)
#     print('hd_mean (+/- std): ' + str(np.round(mf_mean,2)) + ' +/- ' + str(np.round(mf_std,2)))

#     percentiles = [0.025, 0.05, 0.16, 0.25, 0.5, 0.75, 0.84, 0.95, 0.975]
#     for percentile in percentiles:
#         value_percentile = weighted_percentile(sorted_mf, sorted_area_m2_4mf)
#         print('  mf percentile (' + str(percentile) + '): ' +  str(np.round(value_percentile,2)))
# batman = []
# for nweight, weight in enumerate(area_m2_list_all):
#     batman.extend([hd_list_all[nweight]] * int(weight))
# print(len(batman))
# np.median(batman)
# print(np.median(batman), np.mean(batman))

In [None]:
# Summary statistics for the region of debris-covered data
rois = ['01','02','03','04','05','06','07','08','09','10','11','12','HMA','16','17','18']
for roi in rois:
    dc_shp = gpd.read_file(debris_prms.debriscover_fp + debris_prms.debriscover_fn_dict[roi])
    dc_shp = dc_shp.sort_values(by=['RGIId'])

    dc_rgiids = [str(int(x.split('-')[1].split('.')[0])) + '.' + x.split('-')[1].split('.')[1] for x in dc_shp.RGIId]
    
    mb_bin_all_fp = debris_prms.output_fp + 'mb_bins_all/csv/' + roi + '/'
    dc_rgiids_wdata = []
    for i in os.listdir(mb_bin_all_fp):
        glac_str = i.split('_')[0]
        if glac_str in dc_rgiids:
            dc_rgiids_wdata.append(glac_str)
            
    # Select glaciers
    main_glac_rgi_dc = debris_prms.selectglaciersrgitable(dc_rgiids)
    main_glac_rgi_dc_wdata = debris_prms.selectglaciersrgitable(dc_rgiids_wdata)

    # Add debris stats to area
    dc_areaperc_dict = dict(zip(dc_shp.RGIId.values,dc_shp['DC_Area__1'].values))
    dc_area_dict = dict(zip(dc_shp.RGIId.values,dc_shp['DC_Area_v2'].values))
    
    main_glac_rgi_dc['DC_Area_%'] = main_glac_rgi_dc.RGIId.map(dc_areaperc_dict).fillna(0)
    main_glac_rgi_dc_wdata['DC_Area_%'] = main_glac_rgi_dc_wdata.RGIId.map(dc_areaperc_dict).fillna(0)
    main_glac_rgi_dc['DC_Area_v2'] = main_glac_rgi_dc['Area'] * main_glac_rgi_dc['DC_Area_%'] / 100
    main_glac_rgi_dc_wdata['DC_Area_v2'] = main_glac_rgi_dc_wdata['Area'] * main_glac_rgi_dc_wdata['DC_Area_%'] / 100

    # Subset of glaciers
    main_glac_rgi_dc_gt2km2 = (
        main_glac_rgi_dc[((main_glac_rgi_dc['DC_Area_%'] > debris_prms.dc_percarea_threshold) |
                          (main_glac_rgi_dc['DC_Area_v2'] / 1e6 > debris_prms.dc_area_threshold))
                         & (main_glac_rgi_dc['Area'] > debris_prms.min_glac_area)].copy())
    
    # Statistics of interest
    print('\n', roi + ': ', main_glac_rgi_dc.shape[0], 'glaciers -',
          str(np.round(main_glac_rgi_dc['DC_Area_v2'].sum(),1)), 'km2')  
    
    print('  (> 2 km2): ', main_glac_rgi_dc_gt2km2.shape[0], 'glaciers -',
          str(np.round(main_glac_rgi_dc_gt2km2['DC_Area_v2'].sum(),1)), 'km2')  
    
    print('  (w data): ', main_glac_rgi_dc_wdata.shape[0], 'glaciers -',
          str(np.round(main_glac_rgi_dc_wdata['DC_Area_v2'].sum(),1)), 'km2\n\n')  

In [5]:
print('\n\nDONE\n\n')



DONE




In [27]:
# Compute statistics for individual glaciers
# rgiids = ['1.15645', '11.03005', '14.06794']
rgiids = ['13.43232','15.03473']


## ===== REGIONAL MELT FACTOR STATISTICS =====
for nglacier, rgiid in enumerate(rgiids):
        
    print('rgiid:', rgiid)
    
    roi = rgiid.split('.')[0].zfill(2)
    region = roi
    glacno = rgiid.split('.')[1]
    glac_str = rgiid
    glac_str_noleadzero = rgiid
    
    rgiids = []
    hd_fns = []
    # Filepaths
    if roi in ['13', '14', '15']:
        hd_fp = debris_prms.output_fp + 'ts_tif/hd_tifs/HMA/'
    else:
        hd_fp = debris_prms.output_fp + 'ts_tif/hd_tifs/' + roi + '/'
    hd_fp_extrap = hd_fp + 'extrap/'
    mf_fp = hd_fp + 'meltfactor/'
    mf_fp_extrap = hd_fp_extrap + 'meltfactor/'

    # Glaciers optimized
    if os.path.exists(hd_fp + rgiid + '_hdts_m.tif'):
        hd_fullfn = hd_fp + rgiid + '_hdts_m.tif'
        mf_fullfn = mf_fp + rgiid + '_meltfactor.tif'
    elif os.path.exists(hd_fp_extrap + rgiid + '_hdts_m_extrap.tif'):
        hd_fullfn = hd_fp_extrap + rgiid + '_hdts_m_extrap.tif'
        mf_fullfn = mf_fp_extrap + rgiid + '_meltfactor_extrap.tif'
    
    # Create glacier feature from ice thickness raster
    thick_dir = debris_prms.oggm_fp + 'thickness/RGI60-' + str(region.zfill(2)) + '/'
    thick_fn = 'RGI60-' + str(region.zfill(2)) + '.' + rgiid.split('.')[1] + '_thickness.tif'

    gf = create_glacfeat(thick_dir, thick_fn)

    # =====FILENAMES =====
    # Add the filenames
    fn_dict = OrderedDict()
    # DEM
    z1_fp = debris_prms.oggm_fp + 'dems/RGI60-' + str(region.zfill(2)) + '/'
    z1_fn = 'RGI60-' + str(region.zfill(2)) + '.' + rgiid.split('.')[1] + '_dem.tif'
    fn_dict['z1'] = z1_fp + z1_fn

    # Debris thickness and melt factors
    fn_dict['debris_thick_ts'] = hd_fullfn
    fn_dict['meltfactor_ts'] = mf_fullfn

    # Ice thickness
    thick_dir = debris_prms.oggm_fp + 'thickness/RGI60-' + str(region.zfill(2)) + '/'
    thick_fn = 'RGI60-' + str(region.zfill(2)) + '.' + rgiid.split('.')[1] + '_thickness.tif'
    fn_dict['ice_thick'] = thick_dir + thick_fn

    # ===== PROCESS THE DATA =====
    #Expand extent to include buffered region around glacier polygon
    warp_extent = geolib.pad_extent(gf.glac_geom_extent, width=debris_prms.buff_dist)
    if verbose:
        print("Expanding extent")
        print(gf.glac_geom_extent)
        print(warp_extent)
        print(gf.aea_srs)

    #Warp everything to common res/extent/proj
    z1_gt = gdal.Open(fn_dict['z1']).GetGeoTransform()
    z1_res = np.min([z1_gt[1], -z1_gt[5]])
    # resampling algorithm
    r_resampling = 'cubic'
    ds_list = warplib.memwarp_multi_fn(fn_dict.values(), res=z1_res, extent=warp_extent, 
                                       t_srs=gf.aea_srs, verbose=verbose, r=r_resampling)
    ds_dict = dict(zip(fn_dict.keys(), ds_list))
    gf.ds_dict = ds_dict

    if verbose:
        print(ds_list)
        print(fn_dict.keys())

    glac_geom_mask = geolib.geom2mask(gf.glac_geom, ds_dict['z1'])
    gf.z1 = np.ma.array(iolib.ds_getma(ds_dict['z1']), mask=glac_geom_mask)

    # Debris thickness values of 0 are masked (use meltfactor mask instead)
    gf.meltfactor_ts = np.ma.array(iolib.ds_getma(ds_dict['meltfactor_ts']), mask=glac_geom_mask)
    gf.debris_thick_ts = np.ma.array(iolib.ds_getma(ds_dict['debris_thick_ts']), mask=glac_geom_mask)
    gf.debris_thick_ts = np.ma.array(gf.debris_thick_ts.data, mask=gf.meltfactor_ts.mask)

    gf.res = geolib.get_res(ds_dict['z1'])

    if verbose:
        print('\n\n# z1 pixels:', gf.z1.count(), '\n')


    # ===== PLOTS =====
    show_plots = False
    debug = False
    if debug and show_plots:
        # DEM
        var_full2plot = gf.z1.copy()
        clim = malib.calcperc(var_full2plot, (2,98))
        plot_array(var_full2plot, clim, [glac_str + ' DEM'], 'inferno', 'elev (masl)', close_fig=False)
        # Debris thickness
        var_full2plot = gf.debris_thick_ts.copy()
        clim = (0,1)
        plot_array(var_full2plot, clim, [gf.glacnum + ' hd (from ts)'], 'inferno', 'hd (m)', 
                   close_fig=False)
        # Melt factor
        var_full2plot = gf.meltfactor_ts.copy()
        clim = (0,1)
        plot_array(var_full2plot, clim, [gf.glacnum + ' meltfactor'], 'inferno', 'mf (-)',
                   close_fig=False)

    # Get list of values
    hd_list = list(gf.debris_thick_ts.compressed())
    mf_list = list(gf.meltfactor_ts.compressed())

    # remove nan values
    hd_list = [0 if np.isnan(x) else x for x in hd_list]

    if len(hd_list) > 0:
        # Remove nan values
        hd_array_nonan = np.array(hd_list)
        nan_idx_list = [x[0] for x in list(np.argwhere(np.isnan(hd_array_nonan)))]
        if len(nan_idx_list) > 0:
            hd_list = [y for x,y in enumerate(hd_list) if x not in nan_idx_list]
            mf_list = [y for x,y in enumerate(mf_list) if x not in nan_idx_list]

        assert len(hd_list) == len(mf_list), 'hd_list and mf_list differ; NEED TO MASK THESE VALUES OR RE-PROCESS'
        rounding_err = 1e-6
        assert np.max(hd_list) <= debris_prms.hd_max + rounding_err and np.min(hd_list) >= -rounding_err, 'hd outside of bounds' # rounding error may give -1e-12 for some values
        assert np.min(mf_list) >= -rounding_err, 'negative melt factor' 
        assert np.max(mf_list) <= 10, 'melt factor greater than 10!'

        pixel_m2 = abs(gf.res[0] * gf.res[1])
        area_m2_list = [pixel_m2] * len(hd_list)
        
        mf_array = np.array(mf_list)
        area_km2_array = np.array(area_m2_list) / 1e6
        dc_area_km2 = area_km2_array.sum()
        print('  total dc_area (km2):', np.round(dc_area_km2,2))
        mf_idxs = np.where(mf_array < 0.1)[0]
        print('  dc_area (mf < 0.1):', 
              np.round(area_km2_array[mf_idxs].sum(),2), 
              '(' + str(np.round(area_km2_array[mf_idxs].sum() / dc_area_km2 * 100,1)) + '%)')
        mf_idxs = np.where(mf_array < 0.5)[0]
        print('  dc_area (mf < 0.5):', 
              np.round(area_km2_array[mf_idxs].sum(),2), 
              '(' + str(np.round(area_km2_array[mf_idxs].sum() / dc_area_km2 * 100,1)) + '%)')
        mf_idxs = np.where(mf_array > 1)[0]
        print('  dc_area (mf > 1):', 
              np.round(area_km2_array[mf_idxs].sum(),2), 
              '(' + str(np.round(area_km2_array[mf_idxs].sum() / dc_area_km2 * 100,1)) + '%)')
        print('  mf_min:', np.round(mf_array.min(),2), 'mf_max:', np.round(mf_array.max(),2))
    

rgiid: 13.43232
  total dc_area (km2): 20.95
  dc_area (mf < 0.1): 3.53 (16.8%)
  dc_area (mf < 0.5): 16.38 (78.2%)
  dc_area (mf > 1): 2.27 (10.8%)
  mf_min: 0.03 mf_max: 1.26
rgiid: 15.03473
  total dc_area (km2): 19.09
  dc_area (mf < 0.1): 6.57 (34.4%)
  dc_area (mf < 0.5): 16.1 (84.4%)
  dc_area (mf > 1): 1.67 (8.7%)
  mf_min: 0.03 mf_max: 1.31


In [12]:
# Summary of data that has elevation change and velocity data
rois = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']
# rois = ['15']

output_cns = ['roi', 'dc_km2_wdata', 'dc_km2_total','mb_vel_perc']
output_df = pd.DataFrame(np.zeros((len(rois), len(output_cns))), columns=output_cns)
for nroi, roi in enumerate(rois):    
    if roi in ['13','14','15']:
        roi_4dict = 'HMA'
    else:
        roi_4dict = roi
    
    dhdt_vel_df = pd.read_csv(debris_prms.output_fp + 'dhdt_vel_fns/' + roi_4dict + '-dhdt_vel_fns.csv')
    dhdt_vel_df['O1Region'] = [x.split('-')[1].split('.')[0] for x in dhdt_vel_df.RGIId.values]
    
    dc_shp = gpd.read_file(debris_prms.debriscover_fp + debris_prms.debriscover_fn_dict[roi_4dict])
    dc_shp = dc_shp.sort_values(by=['RGIId'])
    
    if roi in ['13','14','15']:
        dhdt_vel_df = dhdt_vel_df[dhdt_vel_df.O1Region == roi]
        dc_shp = dc_shp[dc_shp.O1Region == roi]

    dc_rgiids = [str(int(x.split('-')[1].split('.')[0])) + '.' + x.split('-')[1].split('.')[1] for x in dc_shp.RGIId]
    dhdt_vel_df_rgiids = [str(int(x.split('-')[1].split('.')[0])) + '.' + x.split('-')[1].split('.')[1] 
                          for x in dhdt_vel_df.RGIId]
            
    # Select glaciers
    main_glac_rgi_dc = debris_prms.selectglaciersrgitable(dc_rgiids)
    main_glac_rgi_wdata = debris_prms.selectglaciersrgitable(dhdt_vel_df_rgiids)

    # Add debris stats to area
    dc_areaperc_dict = dict(zip(dc_shp.RGIId.values,dc_shp['DC_Area__1'].values))
    dc_area_dict = dict(zip(dc_shp.RGIId.values,dc_shp['DC_Area_v2'].values))
    
    main_glac_rgi_dc['DC_Area_%'] = main_glac_rgi_dc.RGIId.map(dc_areaperc_dict).fillna(0)
    main_glac_rgi_dc['DC_Area_v2'] = main_glac_rgi_dc['Area'] * main_glac_rgi_dc['DC_Area_%'] / 100
    main_glac_rgi_wdata['DC_Area_%'] = main_glac_rgi_wdata.RGIId.map(dc_areaperc_dict).fillna(0)
    main_glac_rgi_wdata['DC_Area_v2'] = main_glac_rgi_wdata['Area'] * main_glac_rgi_wdata['DC_Area_%'] / 100
    
    
    # Record data
    dc_km2_wdata = main_glac_rgi_wdata['DC_Area_v2'].sum()
    dc_km2_total = main_glac_rgi_dc['DC_Area_v2'].sum()
    both_data_perc =  dc_km2_wdata / dc_km2_total * 100
    
    print(roi, np.round(both_data_perc))
    output_df.loc[nroi,:] = [roi, dc_km2_wdata, dc_km2_total, both_data_perc]
    
output_df.to_csv(debris_prms.output_fp + 'dhdt_vel_data_percent_table.csv', index=False)

6834 glaciers in region 1 are included in this model run: ['00006', '00010', '00012', '00013', '00016', '00017', '00020', '00021', '00022', '00023', '00025', '00027', '00029', '00030', '00032', '00033', '00035', '00036', '00037', '00038', '00040', '00041', '00042', '00044', '00045', '00046', '00068', '00080', '00112', '00118', '00130', '00135', '00138', '00140', '00148', '00151', '00152', '00154', '00159', '00170', '00176', '00187', '00189', '00191', '00194', '00196', '00202', '00224', '00225', '00232'] and more
This study is focusing on 6834 glaciers in region [1]
1109 glaciers in region 1 are included in this model run: ['00006', '00013', '00027', '00033', '00035', '00037', '00038', '00040', '00041', '00042', '00044', '00045', '00046', '00140', '00148', '00187', '00242', '00312', '00336', '00348', '00351', '00399', '00409', '00426', '00434', '00436', '00537', '00544', '00556', '00557', '00558', '00560', '00561', '00565', '00566', '00569', '00570', '00571', '00572', '00574', '00576', 

This study is focusing on 621 glaciers in region [8]
25 glaciers in region 8 are included in this model run: ['00006', '00015', '00024', '00040', '00072', '00075', '00090', '00105', '00126', '00137', '00199', '00213', '00245', '00767', '01482', '01483', '01485', '01486', '01558', '01607', '01623', '01650', '01658', '02256', '02392'] and more
This study is focusing on 25 glaciers in region [8]
08 15.0
527 glaciers in region 9 are included in this model run: ['00014', '00016', '00025', '00027', '00028', '00029', '00031', '00033', '00034', '00035', '00038', '00040', '00051', '00053', '00055', '00056', '00057', '00058', '00060', '00061', '00062', '00064', '00065', '00066', '00067', '00069', '00070', '00071', '00072', '00073', '00077', '00079', '00080', '00081', '00083', '00086', '00087', '00088', '00090', '00091', '00092', '00093', '00094', '00095', '00096', '00097', '00099', '00101', '00102', '00103'] and more
This study is focusing on 527 glaciers in region [9]
69 glaciers in region 9 ar

1433 glaciers in region 16 are included in this model run: ['00001', '00002', '00003', '00004', '00005', '00006', '00007', '00008', '00009', '00010', '00011', '00012', '00014', '00015', '00018', '00019', '00020', '00021', '00022', '00023', '00025', '00026', '00027', '00028', '00029', '00030', '00031', '00032', '00033', '00034', '00035', '00036', '00037', '00038', '00039', '00040', '00041', '00042', '00043', '00044', '00045', '00046', '00047', '00048', '00049', '00050', '00051', '00052', '00054', '00056'] and more
This study is focusing on 1433 glaciers in region [16]
209 glaciers in region 16 are included in this model run: ['00080', '00141', '00163', '00173', '00176', '00177', '00205', '00213', '00214', '00216', '00228', '00244', '00248', '00256', '00261', '00274', '00285', '00287', '00288', '00289', '00299', '00331', '00332', '00337', '00360', '00361', '00362', '00363', '00366', '00368', '00370', '00372', '00373', '00410', '00413', '00417', '00427', '00428', '00433', '00485', '00486'

In [68]:
# Summary DEMs used for each region
rois = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18']
# rois = ['15']

output_cns = ['roi', 'dem sources']
output_df = pd.DataFrame(np.zeros((len(rois), len(output_cns))), columns=output_cns)
for nroi, roi in enumerate(rois):
    if roi in ['13','14','15']:
        roi_4dict = 'HMA'
    else:
        roi_4dict = roi
    dc_shp = gpd.read_file(debris_prms.debriscover_fp + debris_prms.debriscover_fn_dict[roi_4dict])
    dc_shp = dc_shp.sort_values(by=['RGIId'])
    
    if roi in ['13','14','15']:
        dc_shp = dc_shp[dc_shp.O1Region == roi]

    dc_rgiids = [str(int(x.split('-')[1].split('.')[0])) + '.' + x.split('-')[1].split('.')[1] for x in dc_shp.RGIId]
            
    # Select glaciers
    main_glac_rgi_dc = debris_prms.selectglaciersrgitable(dc_rgiids)

    # Add debris stats to area
    dc_areaperc_dict = dict(zip(dc_shp.RGIId.values,dc_shp['DC_Area__1'].values))
    dc_area_dict = dict(zip(dc_shp.RGIId.values,dc_shp['DC_Area_v2'].values))
    
    main_glac_rgi_dc['DC_Area_%'] = main_glac_rgi_dc.RGIId.map(dc_areaperc_dict).fillna(0)
    main_glac_rgi_dc['DC_Area_v2'] = main_glac_rgi_dc['Area'] * main_glac_rgi_dc['DC_Area_%'] / 100
    
    # Load the DEM sources
    dem_df_all = pd.read_csv(debris_prms.oggm_fp + 'statistics/RGI60-' + roi + '.csv')
    dem_df_all = dem_df_all.sort_values('rgi_id')
    dem_df_all.reset_index(inplace=True, drop=True)

    dem_idx_all = []
    dem_rgiid_list = list(dem_df_all.rgi_id.values)
    for rgiid in main_glac_rgi_dc.RGIId.values:
    # for rgiid in main_glac_rgi_dc.RGIId.values[0:10]:
        dem_idx = dem_rgiid_list.index(rgiid)
        dem_idx_all.append(dem_idx)
    
    # Subset of sources for the region
    dem_df_roi = dem_df_all.loc[dem_idx_all]
    dem_df_roi.reset_index(inplace=True, drop=True)
    
    dem_df_roi['DC_Area_v2'] = main_glac_rgi_dc['DC_Area_v2']
    dem_df_roi['Area'] = main_glac_rgi_dc['Area']
    
    unique_dems = list(dem_df_roi.dem_source.unique())
    if np.nan in unique_dems:
        unique_dems.remove(np.nan)
    dc_area_total = main_glac_rgi_dc['DC_Area_v2'].sum()
    area_total = main_glac_rgi_dc['Area'].sum()
    
    print(dc_area_total, area_total)
    output_str = None
    for dem in unique_dems:
        dem_df_roi_subset = dem_df_roi[dem_df_roi['dem_source'] == dem]
        dc_area_perc = dem_df_roi_subset.DC_Area_v2.sum() / dc_area_total * 100
        if output_str is None:
            output_str = dem + ' (' + str(np.round(dc_area_perc,1)) + ')'
        else:
            output_str += ', ' + dem + ' (' + str(np.round(dc_area_perc,1)) + ')'
    
    print(roi, output_str)
    output_df.loc[nroi,:] = [roi, output_str]
    
output_df.to_csv(debris_prms.output_fp + 'ogggm_dem_source_table.csv', index=False)

6834 glaciers in region 1 are included in this model run: ['00006', '00010', '00012', '00013', '00016', '00017', '00020', '00021', '00022', '00023', '00025', '00027', '00029', '00030', '00032', '00033', '00035', '00036', '00037', '00038', '00040', '00041', '00042', '00044', '00045', '00046', '00068', '00080', '00112', '00118', '00130', '00135', '00138', '00140', '00148', '00151', '00152', '00154', '00159', '00170', '00176', '00187', '00189', '00191', '00194', '00196', '00202', '00224', '00225', '00232'] and more
This study is focusing on 6834 glaciers in region [1]
6959.984815399999 75552.992
01 ALASKA (19.5), ARCTICDEM (56.7), SRTM (23.8)
3313 glaciers in region 2 are included in this model run: ['00006', '00016', '00018', '00020', '00039', '00079', '00097', '00098', '00118', '00126', '00134', '00147', '00157', '00173', '00174', '00180', '00208', '00212', '00226', '00227', '00242', '00252', '00255', '00256', '00259', '00280', '00288', '00289', '00291', '00296', '00305', '00317', '0033

2499.2898779399998 27288.634000000002
13 SRTM (100.0)
5873 glaciers in region 14 are included in this model run: ['00005', '00018', '00020', '00026', '00028', '00029', '00032', '00033', '00036', '00043', '00056', '00057', '00063', '00065', '00072', '00075', '00079', '00097', '00101', '00104', '00122', '00127', '00131', '00142', '00145', '00146', '00154', '00155', '00163', '00213', '00219', '00222', '00225', '00243', '00251', '00271', '00287', '00288', '00323', '00332', '00342', '00346', '00347', '00350', '00352', '00353', '00363', '00366', '00367', '00370'] and more
This study is focusing on 5873 glaciers in region [14]
2918.4395827099997 25430.577
14 SRTM (100.0)
5584 glaciers in region 15 are included in this model run: ['00001', '00007', '00008', '00017', '00018', '00019', '00020', '00021', '00022', '00023', '00024', '00026', '00027', '00028', '00029', '00030', '00031', '00032', '00033', '00034', '00035', '00037', '00038', '00039', '00040', '00042', '00043', '00044', '00045', '00047