## Comparison of GloGEM, PyGEM, and OGGM RGI 17 Volume Outputs 

This notebook imports and processes GloGEM, PyGEM, and OGGM RGI 17 volume outpts. Summing glacial volume change by basin, we produce a plot that compares the three models' projected volume values for each basin by SSP. 

Last Updated: 28 June 2023 | FFW

### GloGEM

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from datetime import date
import collections
import datetime
import os

## Generic the filepath to the main data folder
fpath0 = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/'
fpath1 = 'Runoff-intercomparison/GloGEM-output/Volume_GloGEM_20230626/SouthernAndes/' 

#All of the climate models used
modelnames_glo = ['BCC-CSM2-MR','CAMS-CSM1-0','CESM2','CESM2-WACCM','EC-Earth3','EC-Earth3-Veg','FGOALS-f3-L','GFDL-ESM4',
              'INM-CM4-8','INM-CM5-0','MPI-ESM1-2-HR','MRI-ESM2-0', 'NorESM2']

SSPs = ['ssp126','ssp245','ssp370','ssp585'] #Use a different path as we have all 5 ssps for volume

In [None]:
volumes = {}
for s, SSP in enumerate(SSPs):
    volumes[SSP] = {}
    for m, model in enumerate(modelnames_glo):
        temp_df = pd.read_csv(fpath0 + fpath1 + model + '/' + SSP  + '/' + 'Volume_SouthernAndes.dat', sep='\s+', header=None, skiprows=1, index_col=0)
        volumes[SSP][model] = model_discharges.append(temp_df)

In [None]:
#Importing the data for each SSP
all_volumes = [pd.read_csv(os.path.join(fpath, SSP, 'RGIreg11_Volume_individual.dat'), sep='\s+', header=0, skiprows=1, index_col=0) for SSP in SSPs]

In [None]:
import json
def select_glaciers_json(basin='all'):
    '''
    Select glaciers within a basin by MRBID from a json-file,
    which is stored in the data directory.

    Args:
    -----
    basin: str
        String of MRBID or 'all'.

    Returns:
    --------
    If basin is 'all' a list of all relevant glaciers is returned, for
    initiating glacier simulations. If basin is a MRBID the list of glaciers
    within that basin is returned.
    
    Copy of a function written by Erik Holmgren (2022) in holmgren_gha.utils
    '''

    # fpath = './data/rgi_ids_per_basin.json'
    fpath = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/rgi_ids_per_basin.json'  
    with open(fpath) as f:
        basin_dict = json.load(f)

    if basin.lower() != 'all':
        glacier_list = basin_dict[basin]
    else:
        glacier_list = list(itertools.chain.from_iterable(basin_dict.values()))

    return glacier_list

### PyGEM

In [None]:
import xarray as xr

#All of the climate models used
modelnames_py = ['BCC-CSM2-MR','CESM2','CESM2-WACCM','EC-Earth3','EC-Earth3-Veg','FGOALS-f3-L','GFDL-ESM4',
              'INM-CM4-8','INM-CM5-0','MPI-ESM1-2-HR','MRI-ESM2-0', 'NorESM2-MM']

SSPs = ['ssp126','ssp245','ssp370','ssp585'] #List of all SSPs in PyGEM

alpine_basins = {'YELCHO':'3429', 'VALDIVIA':'3428', 'SERRANO':'3426', 'RAPEL':'3423', 'PUELO':'3422', 
                'PASCUA':'3420', 'PALENA':'3419', 'HUASCO':'3412', 'COPIAPO':'3409', 'CISNES':'3408', 
                'BIOBIO':'3405', 'BAKER':'3404', 'AZOPARDO':'3403', 'AISEN':'3401', 'SANTA CRUZ':'3244', 
                'NEGRO':'3232', 'COLORADO':'3212', 'CHICO':'3209'} 

basins = ['YELCHO', 'VALDIVIA', 'SERRANO','RAPEL','PUELO', 'PASCUA', 'PALENA', 'HUASCO', 'COPIAPO', 
          'CISNES', 'BIOBIO', 'BAKER', 'AZOPARDO', 'AISEN', 'SANTA CRUZ', 'NEGRO', 'COLORADO', 'CHICO']

#Generic filepath to navigate to Drive folder 
fpathPy = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/Runoff-intercomparison/PyGEM/17'

In [None]:
basin_gls = {}
for basin, code in alpine_basins.items():
    basin_gls[basin] = select_glaciers_json(code)

In [None]:
#Importing all runoff data
import glob   #use glob to group files by filename similarities (in this case, SSP)

volume_ds = {}
for s, SSP in enumerate(SSPs):
    fpath1 = '/mass_annual/R17_mass_annual_c2_ba1_1set_2000_2100-{}'.format(SSP)
    file_pattern = f'{fpathPy + fpath1}*.nc'
    file_list = glob.glob(file_pattern)
    #print(file_list)
    
    datasets = []  # Create an empty list for each SSP
    if file_list:
        for file in file_list:
            with xr.open_dataset(file) as ds:
                ds = ds.glac_mass_annual.load()
                datasets.append(ds)
    
        combined_ds = xr.concat(datasets, dim='glacier')  # Concatenate the datasets
        volume_ds[SSP] = combined_ds

In [None]:
# Sorting into basins
basin_volumes = {}
for basin, glacier_list in basin_gls.items():
    ## loop over them all, drop the irrelevant IDs, and concatenate the result
    basin_volumes[basin] = {}
    for s, SSP in enumerate(SSPs):
        ds_list = []
        try:
            ds_filtered = volume_ds[SSP].where(volume_ds[SSP].RGIId.isin(glacier_list), drop=True)
            #print(ds_filtered)
            ds_list.append(ds_filtered)
        except ValueError: ## happens if there are no glaciers from this batch in the selected region
            continue
        basin_volumes[basin][SSP] = xr.concat(ds_list, dim='glacier')

In [None]:
#Flipping indexing (to match other models), summing basins, and converting kg to km^3
basin_sums_py = {}
for s, SSP in enumerate(SSPs):        
    basin_sums_py[SSP] = {}
    for basin, glacier_list in basin_gls.items():
        basin_sums_py[SSP][basin] = basin_volumes[basin][SSP].sum(dim='glacier') * 1e-12

In [None]:
basin_sums_py['ssp126']['BIOBIO'][::,0:-1]

In [None]:
#Compute multi GCM means and quartiles
GCM_mean_py = {}
GCM_q1_py = {}
GCM_q3_py = {}
for s, SSP in enumerate(SSPs):
    GCM_mean_py[SSP] = {}
    GCM_q1_py[SSP] = {}
    GCM_q3_py[SSP] = {}
    for basin in basins:
        GCM_mean_py[SSP][basin] = basin_sums_py[SSP][basin].mean(dim = 'model')
        GCM_q1_py[SSP][basin] = basin_sums_py[SSP][basin].quantile(q = 0.25, dim = 'model')
        GCM_q3_py[SSP][basin] = basin_sums_py[SSP][basin].quantile(q = 0.75, dim = 'model')

### OGGM

In [None]:
#All of the climate models used
modelnames_OG = ['BCC-CSM2-MR', 'CAMS-CSM1-0', 'CESM2', 'CESM2-WACCM', 'CMCC-CM2-SR5','EC-Earth3', 
                'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 'INM-CM4-8','INM-CM5-0', 
                 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM', 'TaiESM1']

alpine_basins = {'YELCHO':'3429', 'VALDIVIA':'3428', 'SERRANO':'3426', 'RAPEL':'3423', 'PUELO':'3422', 
                'PASCUA':'3420', 'PALENA':'3419', 'HUASCO':'3412', 'COPIAPO':'3409', 'CISNES':'3408', 
                'BIOBIO':'3405', 'BAKER':'3404', 'AZOPARDO':'3403', 'AISEN':'3401', 'SANTA CRUZ':'3244', 
                'NEGRO':'3232', 'COLORADO':'3212', 'CHICO':'3209'} 

# CMCC-CM2-SR5 & TaiESM1 only hold values for ssp585––this is model list without those GCMS
modelnames_OG_trimmed = ['BCC-CSM2-MR', 'CAMS-CSM1-0', 'CESM2', 'CESM2-WACCM', 'EC-Earth3', 
                         'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 'INM-CM4-8',
                           'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']

#Generic filepath to navigate to Drive folder 
fpathOG1 = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/'
fpathOG2 = 'Lizz Research Stuff/Runoff-intercomparison/OGGM/lschuster/runs_2023.3/output/basins/'

In [None]:
#Importing all runoff data, OGGM is grouped by basin
rf_ds = {}
for basin, ID in alpine_basins.items():
    fpath_basin = 'gcm_from_2000_bc_2000_2019/{}/'.format(ID)
    #print(f'{fpathOG1 + fpathOG2 + fpath_basin}*.nc')
    with xr.open_mfdataset(f'{fpathOG1 + fpathOG2 + fpath_basin}*.nc') as ds:
        ds = ds.volume.load()
    rf_ds[basin] = ds

In [None]:
#Summing individual glacier runoff into basin totals and converting m^3 to km^3
basin_volume_OG = {}
for basin, ID in alpine_basins.items():
    basin_volume_OG[basin] = rf_ds[basin].sum(dim = 'rgi_id') * 1e-9

In [None]:
#creating dict of GloPy format
basins = ['YELCHO', 'VALDIVIA', 'SERRANO','RAPEL','PUELO', 'PASCUA', 'PALENA', 'HUASCO', 'COPIAPO', 
          'CISNES', 'BIOBIO', 'BAKER', 'AZOPARDO', 'AISEN', 'SANTA CRUZ', 'NEGRO', 'COLORADO', 'CHICO']
basin_sums_OG = {}
for s, SSP in enumerate(SSPs):
    basin_sums_OG[SSP] = {}
    for b, basin in enumerate(basins):
        basin_sums_OG[SSP][basin] = basin_volume_OG[basin].sel(scenario = SSP)

In [None]:
#Removing these GCMs for ALL SSPs--doing even 585 as these two are not included...
#... in Glo or Py so not only makeds OOGM easier but maintains GCM consistency in analysis
trimmed_basin_sums_OG = {}
for s, SSP in enumerate(SSPs):
    trimmed_basin_sums_OG[SSP] = {}
    for b, basin in enumerate(basins):
        trimmed_basin_sums_OG[SSP][basin] = xr.concat([basin_sums_OG[SSP][basin][0:4], basin_sums_OG[SSP][basin][5:-1]], dim='gcm')

In [None]:
trimmed_basin_sums_OG['ssp126']['BIOBIO']

In [None]:
#Compute multi GCM means and quartiles for OGGM
GCM_mean_OG = {}
GCM_q1_OG = {}
GCM_q3_OG = {}
for s, SSP in enumerate(SSPs):
    which_ssp = SSPs[s]
    GCM_mean_OG[which_ssp] = {}
    GCM_q1_OG[which_ssp] = {}
    GCM_q3_OG[which_ssp] = {}
    for basin in basins:
        GCM_mean_OG[which_ssp][basin] = trimmed_basin_sums_OG[which_ssp][basin].mean(dim = 'gcm')
        GCM_q1_OG[which_ssp][basin] = trimmed_basin_sums_OG[which_ssp][basin].quantile(q = 0.25, dim = 'gcm')
        GCM_q3_OG[which_ssp][basin] = trimmed_basin_sums_OG[which_ssp][basin].quantile(q = 0.75, dim = 'gcm')

In [None]:
#Plot setup
from cycler import cycler
import matplotlib.patches as mpatches

scenarios = ['ssp126','ssp245','ssp370','ssp585']

basins = ['YELCHO', 'VALDIVIA', 'SERRANO','RAPEL','PUELO', 'PASCUA', 'PALENA', 'HUASCO', 'COPIAPO', 
          'CISNES', 'BIOBIO', 'BAKER', 'AZOPARDO', 'AISEN', 'SANTA CRUZ', 'NEGRO', 'COLORADO', 'CHICO']

basinstext = ['Yelcho', 'Valdivia', 'Serrano','Rapel','Puelo', 'Pascua', 'Palena', 'Huasco', 'Copiapo', 
          'Cisnes', 'Biobio', 'Baker', 'Azopardo', 'Aisen', 'Santa Cruz', 'Negro', 'Colorado', 'Chico']

yrs_glo = np.arange(1980,2101)
yrs_glo_dt = pd.to_datetime([str(y)for y in yrs_glo])

colors_glo =  plt.colormaps['Greens']
line_colors_glo = colors_glo(np.linspace(0.2, 0.6, num = 12))
glo_cycler = cycler(color = line_colors_glo)

colors_py =  plt.colormaps['Purples']
line_colors_py = colors_py(np.linspace(0.2, 0.6,num = 12))
py_cycler = cycler(color = line_colors_py)

colors_OG =  plt.colormaps['Blues']
line_colors_OG = colors_OG(np.linspace(0.2, 0.6,num = 12))
OG_cycler = cycler(color = line_colors_OG)

In [None]:
basin_sums_py['ssp126']['BIOBIO'].sel(model = 1)[0:-1]

In [None]:
#Plotting all data
fig, axs = plt.subplots(len(basins), len(SSPs), figsize=(10, 28), sharex=True)
for s, SSP in enumerate(scenarios):
    which_ssp = SSPs[s]
    for b, basin in enumerate(basins):

        #OG won't plot with built-in ds.plot()
        #Trim last value as it goes to zero
        for m, model in enumerate(modelnames_OG_trimmed):
            axs[b,s].plot(yrs_glo_dt[20:-1], trimmed_basin_sums_OG[which_ssp][basin][:,0:-1].sel(gcm = modelnames_OG_trimmed[m]), color = 'dodgerblue', alpha = 0.15)
        axs[b,s].plot(yrs_glo_dt[20:-1], GCM_mean_OG[which_ssp][basin][0:-1], color = 'royalblue', linewidth = 0.9)
        axs[b,s].plot(yrs_glo_dt[20:-1], GCM_q1_OG[which_ssp][basin][0:-1], color = 'royalblue', linewidth = 0.4)
        axs[b,s].plot(yrs_glo_dt[20:-1], GCM_q3_OG[which_ssp][basin][0:-1], color = 'royalblue', linewidth = 0.4)
        axs[b,s].fill_between(yrs_glo_dt[20:-1], GCM_q1_OG[which_ssp][basin][0:-1], GCM_q3_OG[which_ssp][basin][0:-1], color = 'dodgerblue', alpha = 0.5)

        #Trim first value as it is incomplete hydrological year
        # for m in modelnames_glo:
        #     axs[b, s].plot(yrs_glo_dt, basin_sums_glo[which_ssp][basin][m], color=axs[b, s].set_prop_cycle(glo_cycler), alpha = 0.25)
        # axs[b,s].plot(yrs_glo_dt, GCM_mean_glo[which_ssp][basin], color = 'darkgreen', linewidth = 0.9)
        # axs[b,s].plot(yrs_glo_dt, GCM_q1_glo[which_ssp][basin], color = 'darkgreen', linewidth = 0.4)
        # axs[b,s].plot(yrs_glo_dt, GCM_q3_glo[which_ssp][basin], color = 'darkgreen', linewidth = 0.4)
        # axs[b,s].fill_between(yrs_glo_dt, GCM_q1_glo[which_ssp][basin], GCM_q3_glo[which_ssp][basin], color = 'green')
        # axs[b, s].set(xlim=(pd.to_datetime('2000-01-01'), pd.to_datetime('2100-01-01')))

        # basin_sums_py[which_ssp][basin].plot(hue='model', ax=axs[b, s], color=axs[b, s].set_prop_cycle(py_cycler), alpha = 0.25, add_legend=False)
        # GCM_mean_py[which_ssp][basin].plot(hue='model', ax=axs[b, s], color = 'purple', linewidth = 0.9, add_legend=False)
        # GCM_q1_py[which_ssp][basin].plot(hue='model', ax=axs[b, s], color = 'purple', linewidth = 0.4, add_legend=False)
        # GCM_q3_py[which_ssp][basin].plot(hue='model', ax=axs[b, s], color = 'purple', linewidth = 0.4, add_legend=False)
        # axs[b,s].fill_between(yrs_glo_dt[20::], GCM_q1_py[which_ssp][basin][0:-1], GCM_q3_py[which_ssp][basin][0:-1], color = 'Purple')
        # axs[b,s].set(title = '')

        for m, model in enumerate(modelnames_py):
            axs[b,s].plot(yrs_glo_dt[20::], basin_sums_py[which_ssp][basin].sel(model = m+1)[0:-1], color = 'purple', alpha = 0.15)
        axs[b,s].plot(yrs_glo_dt[20::], GCM_mean_py[which_ssp][basin][0:-1], color = 'purple', linewidth = 0.9)
        axs[b,s].plot(yrs_glo_dt[20::], GCM_q1_py[which_ssp][basin][0:-1], color = 'purple', linewidth = 0.4)
        axs[b,s].plot(yrs_glo_dt[20::], GCM_q3_py[which_ssp][basin][0:-1], color = 'purple', linewidth = 0.4)
        axs[b,s].fill_between(yrs_glo_dt[20::], GCM_q1_py[which_ssp][basin][0:-1], GCM_q3_py[which_ssp][basin][0:-1], color = 'purple', alpha = 0.5)

        #Make mean more clear for RHONE, which overlaps significantly w Glo
        #axs[b,s].plot(yrs_glo_dt[20:-1], GCM_mean_OG[which_ssp][basin][0:-1], color = 'royalblue', linewidth = 0.9)

        #Setting x and y labels and making y limits uniform within basins
        if b == (len(basins)-1):
            for sub_s in range(4):  # Use a different variable name for the inner loop
                axs[b, sub_s].set_xlabel('Year')
                axs[b, sub_s].set_xticks([pd.to_datetime('2020'),pd.to_datetime('2040'), pd.to_datetime('2060'), pd.to_datetime('2080')], [2020, 2040, 2060, 2080])
        else:
            axs[b, s].set_xlabel(None) 
        
        if s == 0:                                                                    #Setting basin labels
            for sub_b in range(len(basins)):
                axs[sub_b,s].set_ylabel(basinstext[sub_b]+ r' $[km^3]$')
        if s != 0:
            axs[b, s].set_ylabel(None)
            axs[b, s].set_yticklabels('')

#green_patch = mpatches.Patch(color='darkgreen', label='GloGEM')
purple_patch = mpatches.Patch(color='purple', label='PyGEM') 
blue_patch = mpatches.Patch(color='royalblue', label='OGGM')
axs[0,0].legend(handles=[purple_patch, blue_patch], bbox_to_anchor=(3.15, 1.71), ncol=3)

plt.suptitle('GloGEM, PyGEM, and OGGM Runoff Projections for Major Southern Andes River Basins', x=0.52, y=0.915)
plt.title('SSP 126                            SSP 245                           SSP 370                            SSP 585', x=-1.3, y=21.5) 