In [1]:
#! /usr/bin/env python
"""
Compute elevation statistics for the debris-covered areas in each latitude and longitude
"""

import sys
import os
import re
import subprocess
from datetime import datetime, timedelta
import time
import pickle
from collections import OrderedDict

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from scipy import ndimage
import xarray as xr
from osgeo import gdal, ogr, osr

from pygeotools.lib import malib, warplib, geolib, iolib, timelib

import debrisglobal.globaldebris_input as debris_prms

In [2]:
# Debris cover extent shapefile with statistics
dc_shp = gpd.read_file(debris_prms.debriscover_fp + debris_prms.debriscover_fn_dict[debris_prms.roi])
dc_shp = dc_shp.sort_values(by=['RGIId'])

print('All DC glaciers:', dc_shp.shape[0], 'All DC Area (km2):', dc_shp.DC_Area_v2.sum() / 1e6)

# Subset by percent debris-covered or debris-covered area
dc_shp_subset = dc_shp[((dc_shp['DC_Area__1'] > debris_prms.dc_percarea_threshold) | 
                        (dc_shp['DC_Area_v2'] / 1e6 > debris_prms.dc_area_threshold))
                        & (dc_shp['Area'] > debris_prms.min_glac_area)].copy()
dc_shp_subset.reset_index(inplace=True, drop=True)

rgino_str_list_subset = [x.split('-')[1] for x in dc_shp_subset.RGIId.values]

print('Subset DC glaciers:', dc_shp_subset.shape[0], 'Subset DC Area (km2):', dc_shp_subset.DC_Area_v2.sum() / 1e6)

dc_shp_subset

All DC glaciers: 2919 All DC Area (km2): 220.176821
Subset DC glaciers: 156 Subset DC Area (km2): 126.509554


Unnamed: 0,RGIId,GLIMSId,BgnDate,EndDate,CenLon,CenLat,O1Region,O2Region,Area,Zmin,...,Name,DC_Area,DC_BgnDate,DC_EndDate,DC_CTSmean,DC_Area_%,area_singl,DC_Area_v2,DC_Area__1,geometry
0,RGI60-11.00002,G013614E47485N,20030799,20030999,13.614373,47.483905,11,1,2.292,2203,...,,186300,2013,2017,29.698587,8.128,900,186399,8.133,"MULTIPOLYGON (((13.60653 47.47813, 13.60692 47..."
1,RGI60-11.00047,G012719E47139N,20030799,20030999,12.718158,47.139499,11,1,2.273,2307,...,,152100,2013,2017,20.334521,6.692,900,156609,6.890,"MULTIPOLYGON (((12.70776 47.13999, 12.70815 47..."
2,RGI60-11.00054,G012372E47149N,20030799,20030999,12.370435,47.149801,11,1,2.274,2359,...,,143100,2013,2017,17.590514,6.293,900,143075,6.292,"MULTIPOLYGON (((12.35997 47.14789, 12.36037 47..."
3,RGI60-11.00068,G012345E47132N,20030799,20030999,12.343717,47.135779,11,1,2.738,2162,...,,176400,2013,2017,18.493408,6.443,900,176364,6.441,"MULTIPOLYGON (((12.35453 47.12994, 12.35493 47..."
4,RGI60-11.00106,G012697E47099N,20030799,20030999,12.698172,47.094468,11,1,17.774,2086,...,Pasterze,2984400,2013,2017,23.575460,16.791,4500,3036802,17.086,"MULTIPOLYGON (((12.70993 47.08010, 12.71112 47..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,RGI60-11.03694,G006263E44892N,20030813,-9999999,6.259492,44.893911,11,1,2.242,2405,...,FR4N01166E06 du Vallon des Etages,813600,2013,2017,42.305511,36.289,900,814213,36.316,"MULTIPOLYGON (((6.25929 44.88545, 6.25967 44.8..."
152,RGI60-11.03698,G006988E45987N,20030813,-9999999,6.995296,45.985482,11,1,8.124,2193,...,FR4N01234A01 du Tour,573300,2013,2017,38.465296,7.057,5401,577015,7.103,"MULTIPOLYGON (((7.00593 45.96921, 7.00709 45.9..."
153,RGI60-11.03701,G007144E45371N,20030813,-9999999,7.145103,45.375305,11,1,2.373,2761,...,FR4N01153B19 du Mulinet,403200,2013,2017,38.446785,16.991,900,404213,17.034,"MULTIPOLYGON (((7.15623 45.36957, 7.15661 45.3..."
154,RGI60-11.03740,G007123E45253N,20030813,-9999999,7.118216,45.254020,11,1,2.067,2773,...,FR4N01153C19 du Baounet 1,453600,2013,2017,41.382340,21.945,1800,464972,22.495,"MULTIPOLYGON (((7.12013 45.24476, 7.12090 45.2..."


In [3]:
# Select glaciers using RGI and find unique latlons
#  (Scherler DC shapefiles do not have same CenLat and CenLon for some reason)
main_glac_rgi_subset = debris_prms.selectglaciersrgitable(rgino_str_list_subset)
main_glac_rgi_subset['CenLon_360'] = main_glac_rgi_subset['CenLon']
main_glac_rgi_subset.loc[main_glac_rgi_subset['CenLon_360'] < 0, 'CenLon_360'] = (
    360 + main_glac_rgi_subset.loc[main_glac_rgi_subset['CenLon_360'] < 0, 'CenLon_360'])

# Load met data and find nearest latlon indices
ds = xr.open_dataset(debris_prms.metdata_fp + '../' + debris_prms.metdata_elev_fn)
#  argmin() finds the minimum distance between the glacier lat/lon and the GCM pixel
lat_nearidx = (np.abs(main_glac_rgi_subset['CenLat'].values[:,np.newaxis] - 
                      ds['latitude'][:].values).argmin(axis=1))
lon_nearidx = (np.abs(main_glac_rgi_subset['CenLon_360'].values[:,np.newaxis] - 
                      ds['longitude'][:].values).argmin(axis=1))
latlon_nearidx = list(zip(lat_nearidx, lon_nearidx))
latlon_nearidx_unique = sorted(list(set(latlon_nearidx)))
main_glac_rgi_subset['latlon_nearidx'] = latlon_nearidx
latlon_unique_dict = dict(zip(latlon_nearidx_unique,np.arange(0,len(latlon_nearidx_unique))))
latlon_unique_dict_reversed = dict(zip(np.arange(0,len(latlon_nearidx_unique)),latlon_nearidx_unique))
main_glac_rgi_subset['latlon_unique_no'] = main_glac_rgi_subset['latlon_nearidx'].map(latlon_unique_dict)

print('unique lat/lons:', len(np.unique(main_glac_rgi_subset['latlon_unique_no'])), '\n\n')

# Delete me
latlon_nearidx_unique_v1 = latlon_nearidx_unique.copy()

lat_list = np.array([ds.latitude[x[0]].values for x in latlon_nearidx_unique])
lon_list = np.array([ds.longitude[x[1]].values for x in latlon_nearidx_unique])
latlon_list = list(tuple(zip(list(lat_list), list(lon_list))))

# Pickle unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list, f)

156 glaciers in region 11 are included in this model run: ['00002', '00047', '00054', '00068', '00106', '00110', '00116', '00135', '00141', '00190', '00199', '00233', '00278', '00376', '00415', '00459', '00469', '00487', '00524', '00541', '00597', '00719', '00781', '00797', '00830', '00846', '00871', '00886', '00887', '00897', '00918', '00929', '00932', '00943', '00945', '00950', '00957', '00958', '01144', '01187', '01193', '01246', '01275', '01296', '01328', '01346', '01450', '01478', '01509', '01550'] and more
This study is focusing on 156 glaciers in region [11]
unique lat/lons: 47 




In [4]:
# ===== LOAD GLACIERS WITH DATA =====
main_glac_rgi_subset['mb_fn'] = np.nan
mb_binned_fp = debris_prms.mb_binned_fp

regions_str = [str(x).zfill(2) for x in debris_prms.roi_rgidict[debris_prms.roi]]

mb_fns = []
mb_rgiids = []
for i in os.listdir(mb_binned_fp):
    if i.endswith('_mb_bins.csv') and i.split('_')[0].split('.')[0].zfill(2) in regions_str:
        mb_fns.append(i)
        rgiid_raw = i.split('_')[0]
        rgiid = 'RGI60-' + rgiid_raw.split('.')[0].zfill(2) + '.' + rgiid_raw.split('.')[1]
        mb_rgiids.append(rgiid)
mb_rgiids = sorted(mb_rgiids)
mb_fns = sorted(mb_fns)
mb_fn_df = pd.DataFrame(np.zeros((len(mb_fns),2)), columns=['RGIId', 'mb_fn'])
mb_fn_df['RGIId'] = mb_rgiids
mb_fn_df['mb_fn'] = mb_fns

# Find glaciers that are debris-covered
mb_dc_rgiid = [value for value in list(mb_fn_df.RGIId.values) if value in list(main_glac_rgi_subset.RGIId.values)]
mb_fn_df_dc = mb_fn_df[mb_fn_df['RGIId'].isin(mb_dc_rgiid)]
mb_fn_df_dc = mb_fn_df_dc.sort_values('RGIId')

print('Debris-covered glaciers:', mb_fn_df_dc.shape[0], '\n\n')

mb_fn_dict = dict(zip(mb_fn_df_dc['RGIId'].values, mb_fn_df_dc['mb_fn'].values))

main_glac_rgi_subset['mb_fn'] = main_glac_rgi_subset.RGIId.map(mb_fn_dict)

Debris-covered glaciers: 150 




In [5]:
# ===== SELECT GLACIERS WITH DATA ====
main_glac_rgi_wobs = main_glac_rgi_subset.dropna(subset=['mb_fn']).copy()
# print('subset wdata length:', main_glac_rgi_wobs.shape)
main_glac_rgi_wobs.reset_index(inplace=True, drop=True)

# Update the latlon unique pickle files
latlon_nearidx_unique = sorted(list(set(main_glac_rgi_wobs['latlon_nearidx'].values)))
latlon_unique_dict = dict(zip(latlon_nearidx_unique,np.arange(0,len(latlon_nearidx_unique))))
latlon_unique_dict_reversed = dict(zip(np.arange(0,len(latlon_nearidx_unique)),latlon_nearidx_unique))
main_glac_rgi_wobs['latlon_unique_no'] = main_glac_rgi_wobs['latlon_nearidx'].map(latlon_unique_dict)

print('unique lat/lons:', len(np.unique(main_glac_rgi_wobs['latlon_unique_no'])), '\n\n')

lat_list = np.array([ds.latitude[x[0]].values for x in latlon_nearidx_unique])
lon_list = np.array([ds.longitude[x[1]].values for x in latlon_nearidx_unique])
latlon_list = list(tuple(zip(list(lat_list), list(lon_list))))

# Pickle unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list, f)
    
main_glac_rgi_wobs

unique lat/lons: 47 




Unnamed: 0,O1Index,RGIId,CenLon,CenLat,O1Region,O2Region,Area,Zmin,Zmax,Zmed,...,TermType,Surging,RefDate,glacno,rgino_str,RGIId_float,CenLon_360,latlon_nearidx,latlon_unique_no,mb_fn
0,1,RGI60-11.00002,13.613500,47.484500,11,1,2.292,2203,2855,2526,...,0,9,20039999,2,11.00002,11.00002,13.613500,"(170, 54)",0,11.00002_mb_bins.csv
1,46,RGI60-11.00047,12.719400,47.138600,11,1,2.273,2307,3253,2967,...,0,9,20039999,47,11.00047,11.00047,12.719400,"(171, 51)",2,11.00047_mb_bins.csv
2,53,RGI60-11.00054,12.371700,47.148700,11,1,2.274,2359,3196,2779,...,0,9,20039999,54,11.00054,11.00054,12.371700,"(171, 49)",1,11.00054_mb_bins.csv
3,67,RGI60-11.00068,12.345300,47.132200,11,1,2.738,2162,3440,2759,...,0,9,20039999,68,11.00068,11.00068,12.345300,"(171, 49)",1,11.00068_mb_bins.csv
4,105,RGI60-11.00106,12.696700,47.099100,11,1,17.774,2086,3487,2984,...,0,9,20039999,106,11.00106,11.00106,12.696700,"(172, 51)",9,11.00106_mb_bins.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,3686,RGI60-11.03687,6.334000,44.865000,11,1,3.040,2256,3513,2925,...,0,9,20030921,3687,11.03687,11.03687,6.334000,"(181, 25)",46,11.03687_mb_bins.csv
146,3697,RGI60-11.03698,6.988000,45.987000,11,1,8.124,2193,3707,2977,...,0,9,20030813,3698,11.03698,11.03698,6.988000,"(176, 28)",30,11.03698_mb_bins.csv
147,3700,RGI60-11.03701,7.144000,45.371000,11,1,2.373,2761,3340,3046,...,0,9,20030813,3701,11.03701,11.03701,7.144000,"(179, 29)",43,11.03701_mb_bins.csv
148,3739,RGI60-11.03740,7.122775,45.252968,11,1,2.067,2773,3310,3026,...,0,9,20030813,3740,11.03740,11.03740,7.122775,"(179, 28)",42,11.03740_mb_bins.csv


In [6]:
# print('DELETE ME - HACK FOR DEVELOPMENT')
# print(np.where(main_glac_rgi_wobs['latlon_unique_no'] == 172)[0])
# main_glac_rgi_wobs = main_glac_rgi_wobs.loc[372:373,:]
# main_glac_rgi_wobs['mb_fn'].values

In [7]:
# ===== DEBRIS ELEVATION STATS ====================================================================================
# CALCULATE DEBRIS ELEVATION STATS FOR GLACIERS WITH DATA FOR EACH UNIQUE LAT/LON
elev_stats_latlon_dict = {}
latlon_list_updated = []
rgiid_4cal = []
for nlatlon, latlon_unique in enumerate(np.unique(main_glac_rgi_wobs.latlon_unique_no)):
# for nlatlon, latlon_unique in enumerate([np.unique(main_glac_rgi_wobs.latlon_unique_no)[0]]):

    main_glac_rgi_subset = main_glac_rgi_wobs[main_glac_rgi_wobs['latlon_unique_no'] == latlon_unique]
    main_glac_rgi_subset.reset_index(inplace=True, drop=True)
    
    # Debris elevation stats should be done by lat/lon
    df_all = None
    elev_list_all = []
    df_idx_count = 0
    count_width_passes = 0
    for nglac, glac_fn in enumerate(main_glac_rgi_subset.mb_fn.values):
#     for nglac, glac_fn in enumerate([main_glac_rgi_subset.mb_fn.values[0]]):

        glac_fullfn = debris_prms.mb_binned_fp + glac_fn
        
        glac_str_noleadzero = glac_fullfn.split('/')[-1].split('_')[0]
        rgiid = 'RGI60-' + glac_str_noleadzero.split('.')[0].zfill(2) + '.' + glac_str_noleadzero.split('.')[1]

        print(main_glac_rgi_subset.loc[nglac,'RGIId'], rgiid)
        
        # Select bins that meet calibratioin criteria
        df_raw = pd.read_csv(glac_fullfn)
        df = df_raw.dropna(subset=['mb_bin_mean_mwea'])
        df['z1_bin_areas_perc_cum'] = np.cumsum(df['z1_bin_area_valid_km2']) /df['z1_bin_area_valid_km2'].sum() * 100
        # add width to bins
        widths_fp = debris_prms.oggm_fp + 'widths/' + 'RGI60-' + rgiid.split('-')[1].split('.')[0] + '/'
        widths_fn = rgiid + '_widths_m.csv'
        try:
            # Add width to each elevation bin
            widths_df = pd.read_csv(widths_fp + widths_fn)
            elev_nearidx = (np.abs(df['bin_center_elev_m'].values[:,np.newaxis] - 
                                   widths_df['elev'].values).argmin(axis=1))
            df['width_m'] = widths_df.loc[elev_nearidx,'width_m'].values
        except:
            df['width_m'] = 0
        
        df_idx = np.where((df['vm_med'] <= debris_prms.vel_threshold) 
                          & (df['width_m'] >= debris_prms.width_min_dict[debris_prms.roi])
                          & (df['dc_bin_area_perc'] >= debris_prms.debrisperc_threshold)
                          & (df['dc_bin_count_valid'] >= 10)
                          & (df['z1_bin_areas_perc_cum'] <= debris_prms.term_area_perc)
                          )[0]
        df_debris = df.loc[df_idx,:]
        df_debris.reset_index(inplace=True, drop=True)
        df_idx_count += len(df_idx)
        
            
        if len(df_idx) > 0:
            for nelev, elev in enumerate(list(df_debris['bin_center_elev_m'].values)):
                elev_list_single = list(np.repeat(elev, df_debris.loc[nelev,'dc_bin_count_valid']))
                elev_list_all.extend(elev_list_single)
            
#             # only work with terminus
#             df_idx_dif = list(df_idx[1:] - df_idx[:-1])
#             if np.sum(df_idx_dif) == len(df_idx)-1:
#                 df_idx_nojump = df_idx
#             else:
#                 idx_jumpinbins = df_idx_dif.index(next(filter(lambda x: x>1, df_idx_dif)))
#                 df_idx_nojump = df_idx[0:idx_jumpinbins+1]
#             df_debris_nojump = df_debris.loc[df_idx_nojump,:]
#             df_debris_nojump.reset_index(inplace=True, drop=True)
#             # Median width to ensure terminus velocities can be estimated
#             width_median = np.median(widths_m[np.where(h < df_debris_nojump['bin_center_elev_m'].max())[0]])
#             if width_median > debris_prms.width_min_dict[debris_prms.roi]:
#                 for nelev, elev in enumerate(list(df_debris_nojump['bin_center_elev_m'].values)):
#                     elev_list_single = list(np.repeat(elev, df_debris_nojump.loc[nelev,'dc_bin_count_valid']))
#                     elev_list_all.extend(elev_list_single)
#                 count_width_passes += 1
    
            rgiid_4cal.append(rgiid.split('-')[1])
        
    if df_idx_count > 0:
        dc_zmean = np.mean(elev_list_all)
        dc_zstd = np.std(elev_list_all)
        dc_zmed = malib.fast_median(elev_list_all)
        dc_zmad = malib.mad(elev_list_all)
        
        lat_deg = float(ds.latitude[latlon_unique_dict_reversed[latlon_unique][0]].values)
        lon_deg = float(ds.longitude[latlon_unique_dict_reversed[latlon_unique][1]].values)
        elev_stats_latlon_dict[lat_deg,lon_deg] = [dc_zmean, dc_zstd, dc_zmed, dc_zmad]
        latlon_list_updated.append((lat_deg, lon_deg))
        
print('unique lat/lons updated:', len(latlon_list_updated))
# Update pickle of unique lat/lons that will be used for melt model
with open(debris_prms.latlon_unique_fp + debris_prms.latlon_unique_dict[debris_prms.roi], 'wb') as f:
    pickle.dump(latlon_list_updated, f)

RGI60-11.00002 RGI60-11.00002
RGI60-11.00054 RGI60-11.00054
RGI60-11.00068 RGI60-11.00068
RGI60-11.00110 RGI60-11.00110
RGI60-11.00047 RGI60-11.00047
RGI60-11.00719 RGI60-11.00719
RGI60-11.00487 RGI60-11.00487
RGI60-11.00376 RGI60-11.00376
RGI60-11.00541 RGI60-11.00541
RGI60-11.00597 RGI60-11.00597
RGI60-11.00415 RGI60-11.00415
RGI60-11.00459 RGI60-11.00459
RGI60-11.00469 RGI60-11.00469
RGI60-11.00524 RGI60-11.00524
RGI60-11.00116 RGI60-11.00116
RGI60-11.00141 RGI60-11.00141
RGI60-11.00233 RGI60-11.00233
RGI60-11.00278 RGI60-11.00278
RGI60-11.00135 RGI60-11.00135
RGI60-11.00190 RGI60-11.00190
RGI60-11.00199 RGI60-11.00199
RGI60-11.00106 RGI60-11.00106
RGI60-11.00830 RGI60-11.00830
RGI60-11.01144 RGI60-11.01144
RGI60-11.01187 RGI60-11.01187
RGI60-11.01193 RGI60-11.01193
RGI60-11.01246 RGI60-11.01246
RGI60-11.00918 RGI60-11.00918
RGI60-11.00932 RGI60-11.00932
RGI60-11.00797 RGI60-11.00797
RGI60-11.00781 RGI60-11.00781
RGI60-11.00846 RGI60-11.00846
RGI60-11.00897 RGI60-11.00897
RGI60-11.0

In [8]:
# Statistics of data coverage
rgiid_4cal = sorted(rgiid_4cal)
main_glac_rgi_4cal = debris_prms.selectglaciersrgitable(rgiid_4cal)
dc_area_dict = dict(zip(dc_shp.RGIId.values, dc_shp.DC_Area_v2.values))
main_glac_rgi_4cal['DC_Area_v2'] = main_glac_rgi_4cal.RGIId.map(dc_area_dict)
print('\nDC glaciers (used for cal):', main_glac_rgi_4cal.shape[0], 
      'DC Area (used for cal, km2):', main_glac_rgi_4cal.DC_Area_v2.sum() / 1e6)

147 glaciers in region 11 are included in this model run: ['00002', '00047', '00054', '00068', '00106', '00110', '00116', '00135', '00141', '00190', '00199', '00233', '00278', '00376', '00459', '00469', '00487', '00524', '00541', '00597', '00719', '00781', '00797', '00830', '00846', '00871', '00886', '00887', '00897', '00918', '00929', '00932', '00943', '00945', '00950', '00957', '00958', '01144', '01187', '01193', '01246', '01275', '01296', '01328', '01346', '01450', '01478', '01509', '01550', '01604'] and more
This study is focusing on 147 glaciers in region [11]

DC glaciers (used for cal): 147 DC Area (used for cal, km2): 122.84179


In [9]:
# ===== ADD DEBRIS ELEVATION STATS TO MET DATA ======
overwrite_dc_stats = True
for nlatlon, latlon in enumerate(latlon_list_updated):
# for nlatlon, latlon in enumerate([latlon_list_updated[0]]):
    
    lat_deg = latlon[0]
    lon_deg = latlon[1]
    
    print(nlatlon, lat_deg, lon_deg)
    
    if lat_deg < 0:
        lat_str = 'S-'
    else:
        lat_str = 'N-' 

    # ===== Meteorological data =====
    metdata_fn = debris_prms.metdata_fn_sample.replace(
        'XXXX', str(int(np.abs(lat_deg)*100)) + lat_str + str(int(lon_deg*100)) + 'E-')
    
    ds = xr.open_dataset(debris_prms.metdata_fp + metdata_fn) 
    try:
        print('  existed:', ds.dc_zmean.values, 'vs', elev_stats_latlon_dict[latlon][0])
    except:
        pass
    if 'dc_zmean' not in list(ds.keys()) or overwrite_dc_stats:
        # Add stats
        ds['dc_zmean'] = elev_stats_latlon_dict[latlon][0]
        ds['dc_zmean'].attrs = {'units':'m a.s.l.', 'long_name':'Mean debris cover elevation', 
                                'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zstd'] = elev_stats_latlon_dict[latlon][1]
        ds['dc_zstd'].attrs = {'units':'m a.s.l.', 'long_name':'Standard deviation of debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zmed'] = elev_stats_latlon_dict[latlon][2]
        ds['dc_zmed'].attrs = {'units':'m a.s.l.', 'long_name':'Median debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}
        ds['dc_zmad'] = elev_stats_latlon_dict[latlon][3]
        ds['dc_zmad'].attrs = {'units':'m a.s.l.', 'long_name':'Median absolute deviation of debris cover elevation', 
                               'comment':'converted from debris cover with data that will be used for subdebris melt inversion'}

        try:
            ds.close()
        except:
            continue
            
        # Export updated dataset
        ds.to_netcdf(debris_prms.metdata_fp + metdata_fn, mode='a')
    else:
        print(lat_deg, lon_deg, 'exists')

0 -43.25 170.75
  existed: 1233.2008567348882 vs 1157.875226039783
1 -43.25 171.0
  existed: 1180.7060755336618 vs 1192.5833333333333
2 -43.5 170.0
  existed: 827.355871886121 vs 827.355871886121
3 -43.5 170.25
  existed: 1041.164316966456 vs 1041.164316966456
4 -43.5 170.5
  existed: 1122.3825991991264 vs 1122.3825991991264
5 -43.75 170.0
  existed: 1069.362657091562 vs 1069.362657091562
6 -44.0 169.5
  existed: 1670.0393700787401 vs 1670.0393700787401
7 -44.5 168.25
  existed: 1026.4147130153597 vs 1026.4147130153597
8 -44.5 168.5
  existed: 1297.0892575039495 vs 1297.0892575039495
9 -44.5 168.75
  existed: 1389.0 vs 1389.0


In [10]:
print('DONE!')

DONE!


In [11]:
# ==== OLD FILE OF LOADIING MULTIPLE DATASETS =====
# # ===== LOAD GLACIERS WITH LARSEN DATA =====
# dc_shp_subset['larsen_fullfn'] = np.nan
# larsen_fullfn_dict = {}
# if 'larsen' in input.mb_datasets:
#     mb_summary = pd.read_csv(input.larsen_fp + input.larsen_fn)
    
#     # Find glaciers that are debris-covered
#     larsen_dc_rgiid = [value for value in list(mb_summary.RGIId.values) 
#                        if value in list(dc_shp_subset.RGIId.values)]

#     mb_summary_dc = mb_summary[mb_summary['RGIId'].isin(larsen_dc_rgiid)]
#     mb_summary_dc = mb_summary_dc.sort_values('RGIId')
#     mb_summary_dc.reset_index(inplace=True, drop=True)
#     mb_summary_dc.loc[mb_summary_dc['name'] == 'Maclaren', 'name'] = 'MacLaren'
#     mb_summary_dc.loc[mb_summary_dc['name'] == 'Tlikakila Fork', 'name'] = 'TlikakilaGlacierFork'
#     mb_summary_dc.loc[mb_summary_dc['name'] == 'Tlikakila N. Fork', 'name'] = 'TlikakilaNorthFork'
#     mb_summary_dc['larsen_fullfn'] = np.nan
    
#     for n, glac_name in enumerate(mb_summary_dc.name.values):
# #     for n, glac_name in enumerate([mb_summary_dc.name.values[47]]):
# #         print(n, glac_name)
            
#         glac_name = glac_name.replace(' ', '')
#         glac_fns = []
#         start_yr = []
#         end_yr = []
#         for i in os.listdir(input.larsen_binned_fp):
#             if i.startswith(glac_name):
#                 glac_fns.append(i)
#                 start_yr.append(i.split('.')[1][0:4])
#                 end_yr.append(i.split('.')[2][0:4])
                
#         if len(glac_fns) > 0:
#             yr_dif = np.array(end_yr).astype(int) - np.array(start_yr).astype(int)
#             mb_fn = glac_fns[np.where(yr_dif == yr_dif.max())[0][0]]
            
#             # ===== Process Larsen dataset =====
#             larsen_data_raw = np.genfromtxt(input.larsen_binned_fp + mb_fn, skip_header=3)
#             larsen_data_header = ['E', 'DZ', 'DZ25', 'DZ75', 'AAD', 'MassChange', 'MassBal', 'NumData']
#             larsen_data = pd.DataFrame(larsen_data_raw, columns=larsen_data_header)
#             larsen_data['std from DZ25'] = np.absolute(larsen_data['DZ'] - larsen_data['DZ25']) / 0.67
#             larsen_data['std from DZ75'] = np.absolute(larsen_data['DZ'] - larsen_data['DZ75']) / 0.67
#             larsen_data[' dhdt_bin_std_ma'] = (larsen_data['std from DZ25'] + larsen_data['std from DZ75']) / 2
#             larsen_data[' mb_bin_std_mwea'] = larsen_data[' dhdt_bin_std_ma'] * 900 / 1000
#             larsen_data['AAD'] = larsen_data['AAD'] / 1e6
#             larsen_data['startyear'] = int(mb_fn.split('.')[1][0:4])
#             larsen_data['endyear'] = int(mb_fn.split('.')[2][0:4])
#             larsen_data = larsen_data.rename({'E': '# bin_center_elev_m',
#                                               'DZ': ' dhdt_bin_mean_ma',
#                                               'MassBal': ' mb_bin_mean_mwea',
#                                               'AAD': ' z1_bin_area_valid_km2',
#                                              }, axis='columns')
#             new_fn = mb_summary_dc.loc[n,'RGIId'].split('-')[1][1:] + '_larsen_mb_bins.csv'
#             larsen_data.to_csv(input.larsen_binned_fp + new_fn, index=False)
            
#             mb_summary_dc.loc[n, 'larsen_fullfn'] = input.larsen_binned_fp + new_fn
            
#         else:
#             print(n, glac_name, 'has no file\n')

#     mb_summary_dc.dropna(subset=['larsen_fullfn'], inplace=True)
#     mb_summary_dc.reset_index(inplace=True, drop=True)
    
#     print('Larsen debris-covered glaciers:', mb_summary_dc.shape[0], '\n\n')
    
#     larsen_fullfn_dict = dict(zip(mb_summary_dc['RGIId'].values, mb_summary_dc['larsen_fullfn'].values))
# #     print(larsen_fullfn_dict)
#     dc_shp_subset['larsen_fullfn'] = dc_shp_subset.RGIId.map(larsen_fullfn_dict)

# # ===== LOAD GLACIERS WITH BRAUN DATA =====
# dc_shp_subset['braun_fullfn'] = np.nan
# braun_fullfn_dict = {}
# if 'braun' in input.mb_datasets:
#     mb_binned_fp = input.main_directory + '/../mb_data/Braun/binned_data/'
# #     mb_binned_fp = input.mb_binned_fp
    
#     mb_fns = []
#     braun_rgiids = []
#     for i in os.listdir(mb_binned_fp):
#         if i.endswith('_mb_bins.csv'):
#             mb_fns.append(mb_binned_fp + i)
#             rgiid_raw = i.split('_')[0]
#             rgiid = 'RGI60-' + rgiid_raw.split('.')[0].zfill(2) + '.' + rgiid_raw.split('.')[1]
#             braun_rgiids.append(rgiid)
#     braun_fn_df = pd.DataFrame(np.zeros((len(mb_fns),2)), columns=['RGIId', 'braun_fn'])
#     braun_fn_df['RGIId'] = braun_rgiids
#     braun_fn_df['braun_fullfn'] = mb_fns
    
#     # Find glaciers that are debris-covered
#     braun_dc_rgiid = [value for value in list(braun_fn_df.RGIId.values) 
#                        if value in list(dc_shp_subset.RGIId.values)]
#     braun_fn_df_dc = braun_fn_df[braun_fn_df['RGIId'].isin(braun_dc_rgiid)]
#     braun_fn_df_dc = braun_fn_df_dc.sort_values('RGIId')
    
#     print('Braun debris-covered glaciers:', braun_fn_df_dc.shape[0], '\n\n')
    
#     braun_fullfn_dict = dict(zip(braun_fn_df_dc['RGIId'].values, braun_fn_df_dc['braun_fullfn'].values))
    
#     dc_shp_subset['braun_fullfn'] = dc_shp_subset.RGIId.map(braun_fullfn_dict)

# # ===== LOAD GLACIERS WITH SHEAN DATA =====
# dc_shp_subset['shean_fullfn'] = np.nan
# shean_fullfn_dict = {}
# if 'shean' in input.mb_datasets:
# #     mb_binned_fp = input.main_directory + '/../mb_data/Shean_2019_0213/mb_combined_20190213_nmad_bins/'
#     mb_binned_fp = input.mb_binned_fp
    
#     mb_fns = []
#     rgiids = []
#     for i in os.listdir(mb_binned_fp):
#         if i.endswith('_mb_bins.csv'):
#             mb_fns.append(mb_binned_fp + i)
#             rgiid_raw = i.split('_')[0]
#             rgiid = 'RGI60-' + rgiid_raw.split('.')[0].zfill(2) + '.' + rgiid_raw.split('.')[1]
#             rgiids.append(rgiid)
#     mb_fn_df = pd.DataFrame(np.zeros((len(mb_fns),2)), columns=['RGIId', 'mb_fn'])
#     mb_fn_df['RGIId'] = rgiids
#     mb_fn_df['mb_fullfn'] = mb_fns
    
#     # Find glaciers that are debris-covered
#     mb_dc_rgiid = [value for value in list(mb_fn_df.RGIId.values) 
#                    if value in list(dc_shp_subset.RGIId.values)]
#     mb_fn_df_dc = mb_fn_df[mb_fn_df['RGIId'].isin(mb_dc_rgiid)]
#     mb_fn_df_dc = mb_fn_df_dc.sort_values('RGIId')
    
#     print('shean debris-covered glaciers:', mb_fn_df_dc.shape[0], '\n\n')
    
#     shean_fullfn_dict = dict(zip(mb_fn_df_dc['RGIId'].values, mb_fn_df_dc['mb_fullfn'].values))
# #     print(shea_fullfn_dict)
#     dc_shp_subset['shean_fullfn'] = dc_shp_subset.RGIId.map(shean_fullfn_dict)

# # Merge dictionaries together
# mb_fn_dict = dict(list(larsen_fullfn_dict.items()) + list(braun_fullfn_dict.items()) + 
#                   list(shean_fullfn_dict.items()))