In [1]:
#! /usr/bin/env python
"""
There is a small subset of glaciers for which surface temperature data was not available during the ablation season.
For these glaciers, we utilize a composite from all months to ensure we have complete glacier coverage.
"""
import sys
import os
import re
import subprocess
from datetime import datetime, timedelta
import time
import pickle
from collections import OrderedDict

import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from rasterio.merge import merge
from rasterio.warp import calculate_default_transform, reproject, Resampling
from scipy import ndimage
from scipy.optimize import curve_fit
from scipy.optimize import minimize
from scipy.stats import median_absolute_deviation
import xarray as xr
from osgeo import gdal, ogr, osr

from pygeotools.lib import malib, warplib, geolib, iolib, timelib


import debrisglobal.globaldebris_input as debris_prms
from debrisglobal.glacfeat import GlacFeat, create_glacfeat
from meltcurves import melt_fromdebris_func
from meltcurves import debris_frommelt_func
from spc_split_lists import split_list

In [2]:
# ===== LOAD ALL GLACIERS =====
bin_fp = debris_prms.output_fp + 'mb_bins_4nsidc/_wdebris_hdts/'
bin_fp_wemvel = bin_fp + '../'

mb_bin_fns = []
for i in os.listdir(bin_fp):
    if i.endswith('_mb_bins_hdts.csv'):
        mb_bin_fns.append(i)

mb_bin_fns = sorted(mb_bin_fns)
print(mb_bin_fns)
        

['1.00013_mb_bins_hdts.csv', '1.00033_mb_bins_hdts.csv', '1.00038_mb_bins_hdts.csv', '1.00040_mb_bins_hdts.csv', '1.00041_mb_bins_hdts.csv', '1.00042_mb_bins_hdts.csv', '1.00044_mb_bins_hdts.csv', '1.00045_mb_bins_hdts.csv', '1.00046_mb_bins_hdts.csv', '1.00140_mb_bins_hdts.csv', '1.00148_mb_bins_hdts.csv', '1.00312_mb_bins_hdts.csv', '1.00351_mb_bins_hdts.csv', '1.00399_mb_bins_hdts.csv', '1.00409_mb_bins_hdts.csv', '1.00426_mb_bins_hdts.csv', '1.00434_mb_bins_hdts.csv', '1.00436_mb_bins_hdts.csv', '1.00537_mb_bins_hdts.csv', '1.00544_mb_bins_hdts.csv', '1.00556_mb_bins_hdts.csv', '1.00557_mb_bins_hdts.csv', '1.00561_mb_bins_hdts.csv', '1.00566_mb_bins_hdts.csv', '1.00569_mb_bins_hdts.csv', '1.00570_mb_bins_hdts.csv', '1.00571_mb_bins_hdts.csv', '1.00572_mb_bins_hdts.csv', '1.00574_mb_bins_hdts.csv', '1.00576_mb_bins_hdts.csv', '1.00578_mb_bins_hdts.csv', '1.00579_mb_bins_hdts.csv', '1.00581_mb_bins_hdts.csv', '1.00582_mb_bins_hdts.csv', '1.00584_mb_bins_hdts.csv', '1.00600_mb_bins_hd

In [16]:
for nfn, mb_bin_fn in enumerate(mb_bin_fns):
    df = pd.read_csv(bin_fp + mb_bin_fn)
    df_wem = pd.read_csv(bin_fp + '../' + mb_bin_fn.replace('_hdts.csv', '.csv'))
    
    # Mark progress
    if nfn%500 == 0:
        print(nfn, 'of', len(mb_bin_fns))
    
    # Report glaciers that don't perform well (should be none)
    if not df.shape[0] == df_wem.shape[0]:
        print(mb_bin_fin)
        
    # Subset and concatenate relevant data
    cns_batch1 = ['bin_center_elev_m', 'z1_bin_count_valid', 'z1_bin_area_valid_km2', 
                  'dhdt_bin_count', 'dhdt_bin_mean_ma', 'dhdt_bin_std_ma',
                  'dhdt_bin_med_ma', 'dhdt_bin_mad_ma', 'mb_bin_mean_mwea',
                  'mb_bin_std_mwea', 'mb_bin_med_mwea', 'mb_bin_mad_mwea',
                  'dc_dhdt_bin_count', 'dc_dhdt_bin_mean_ma', 'dc_dhdt_bin_std_ma',
                  'dc_dhdt_bin_med_ma', 'dc_dhdt_bin_mad_ma', 'dc_mb_bin_mean_mwea',
                  'dc_mb_bin_std_mwea', 'dc_mb_bin_med_mwea', 'dc_mb_bin_mad_mwea',
                  'dc_bin_count_valid', 'dc_bin_area_valid_km2', 'ts_mean', 'ts_std', 'ts_med', 'ts_mad',
                  'dc_ts_mean', 'dc_ts_std', 'dc_ts_med', 'dc_ts_mad', 'vm_med', 'vm_mad', 'H_mean', 'H_std']
    cns_batch2 = ['emvel_mean', 'emvel_std', 'emvel_med', 'emvel_mad']
    cns_batch3 = ['hd_ts_mean_m', 'hd_ts_std_m', 'hd_ts_med_m', 'hd_ts_mad_m', 
                  'mf_ts_mean', 'mf_ts_std', 'mf_ts_med', 'mf_ts_mad']
    df_output = df.loc[:,cns_batch1]
    df_output.loc[:,cns_batch2] = df_wem.loc[:,cns_batch2]
    df_output.loc[:,cns_batch3] = df.loc[:,cns_batch3]
    
    roi = mb_bin_fn.split('.')[0].zfill(2)
    output_fp = bin_fp + '../' + roi + '/'
    output_fn = mb_bin_fn.split('_')[0] + '_bins.csv'
    if not os.path.exists(output_fp):
        os.makedirs(output_fp)
    df_output.to_csv(output_fp + output_fn, index=False)
        

0 of 4480
500 of 4480
1000 of 4480
1500 of 4480
2000 of 4480
2500 of 4480
3000 of 4480
3500 of 4480
4000 of 4480


In [17]:
df_output

Unnamed: 0,bin_center_elev_m,z1_bin_count_valid,z1_bin_area_valid_km2,dhdt_bin_count,dhdt_bin_mean_ma,dhdt_bin_std_ma,dhdt_bin_med_ma,dhdt_bin_mad_ma,mb_bin_mean_mwea,mb_bin_std_mwea,...,emvel_med,emvel_mad,hd_ts_mean_m,hd_ts_std_m,hd_ts_med_m,hd_ts_mad_m,mf_ts_mean,mf_ts_std,mf_ts_med,mf_ts_mad
0,15.0,75.0,0.072075,40.0,-0.533627,0.2224,-0.603794,0.23777,-0.402057,0.20634,...,0.28985,0.206697,0.098399,0.070225,0.073958,0.057231,0.501776,0.232595,0.48725,0.243968
1,25.0,536.0,0.515096,290.0,-0.464958,0.302656,-0.393707,0.322154,-0.561103,0.351627,...,0.289331,0.368099,0.122611,0.108508,0.092388,0.096683,0.507882,0.310148,0.413044,0.316709
2,35.0,361.0,0.346921,28.0,-0.202354,0.120562,-0.124259,0.025282,-0.605353,0.323602,...,0.205555,1.13247,0.054257,0.02657,0.058034,0.029358,0.657892,0.206504,0.576536,0.173373
3,45.0,303.0,0.291183,14.0,-0.152221,0.034921,-0.155751,0.044415,-0.514363,0.280927,...,0.495223,1.759603,0.13878,0.050242,0.144649,0.071065,0.32865,0.101422,0.295926,0.120623
4,55.0,352.0,0.338272,42.0,-0.245229,0.10293,-0.215481,0.109502,-0.393649,0.255033,...,-0.501284,1.471369,0.175106,0.063345,0.182853,0.075589,0.277112,0.10758,0.236464,0.070478
5,65.0,355.0,0.341155,58.0,-0.169108,0.0426,-0.182881,0.034054,-0.308747,0.179259,...,-0.635741,2.224801,0.15624,0.081236,0.161039,0.090239,0.350019,0.207232,0.263638,0.117737
6,75.0,480.0,0.46128,77.0,-0.117538,0.065132,-0.131471,0.07161,-0.256772,0.181881,...,0.244407,1.31539,0.166053,0.102934,0.142042,0.068605,0.330871,0.168168,0.292961,0.14844
7,85.0,639.0,0.614079,132.0,-0.101921,0.090008,-0.112343,0.085801,-0.214822,0.249131,...,0.297872,0.994727,0.28605,0.17572,0.274773,0.230352,0.250477,0.195565,0.164857,0.104441
8,95.0,685.0,0.658285,98.0,-0.126173,0.150555,-0.102364,0.200445,-0.165575,0.2914,...,-0.025285,0.734248,0.275719,0.175339,0.255708,0.214717,0.267531,0.220072,0.175916,0.123961
9,105.0,561.0,0.539121,29.0,0.083552,0.066727,0.065677,0.059545,-0.098368,0.297789,...,0.036508,1.012194,0.295553,0.14329,0.295128,0.175289,0.195275,0.102621,0.154491,0.078618


In [18]:
df_output.columns

Index(['bin_center_elev_m', 'z1_bin_count_valid', 'z1_bin_area_valid_km2',
       'dhdt_bin_count', 'dhdt_bin_mean_ma', 'dhdt_bin_std_ma',
       'dhdt_bin_med_ma', 'dhdt_bin_mad_ma', 'mb_bin_mean_mwea',
       'mb_bin_std_mwea', 'mb_bin_med_mwea', 'mb_bin_mad_mwea',
       'dc_dhdt_bin_count', 'dc_dhdt_bin_mean_ma', 'dc_dhdt_bin_std_ma',
       'dc_dhdt_bin_med_ma', 'dc_dhdt_bin_mad_ma', 'dc_mb_bin_mean_mwea',
       'dc_mb_bin_std_mwea', 'dc_mb_bin_med_mwea', 'dc_mb_bin_mad_mwea',
       'dc_bin_count_valid', 'dc_bin_area_valid_km2', 'ts_mean', 'ts_std',
       'ts_med', 'ts_mad', 'dc_ts_mean', 'dc_ts_std', 'dc_ts_med', 'dc_ts_mad',
       'vm_med', 'vm_mad', 'H_mean', 'H_std', 'emvel_mean', 'emvel_std',
       'emvel_med', 'emvel_mad', 'hd_ts_mean_m', 'hd_ts_std_m', 'hd_ts_med_m',
       'hd_ts_mad_m', 'mf_ts_mean', 'mf_ts_std', 'mf_ts_med', 'mf_ts_mad'],
      dtype='object')