### Aggregation of PyGEM runoff:

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from datetime import date
import collections
import datetime
import os
import xarray as xr

In [2]:
#All of the climate models used
modelnames_py = ['BCC-CSM2-MR','CESM2','CESM2-WACCM','EC-Earth3','EC-Earth3-Veg','FGOALS-f3-L','GFDL-ESM4',
              'INM-CM4-8','INM-CM5-0','MPI-ESM1-2-HR','MRI-ESM2-0', 'NorESM2-MM']

SSPs = ['ssp126','ssp245','ssp370','ssp585'] #List of all SSPs in PyGEM

Alpine_basins = {'YSYK-KOL':'2919', 'TARIM HE':'2914', 'TALAS':'2913', 'LAKE BALKHASH':'2910', 
    'CHUY':'2905','ARAL SEA':'2902', 'YELLOW RIVER':'2434', 'MEKONG':'2421', 'SALWEEN':'2319', 
    'INDUS':'2309', 'BRAHMAPUTRA':'2302', 'YANGTZE' : '2433'}

basins = ['YSYK-KOL', 'TARIM HE', 'TALAS', 'LAKE BALKHASH', 'CHUY', 'ARAL SEA', 'YELLOW RIVER', 'MEKONG', 
          'SALWEEN', 'INDUS', 'BRAHMAPUTRA', 'YANGTZE']

#Generic filepath to navigate to Drive folder 
fpathPy = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/Runoff-intercomparison/PyGEM/13'

In [3]:
import json
def select_glaciers_json(basin='all'):
    '''
    Select glaciers within a basin by MRBID from a json-file,
    which is stored in the data directory.

    Args:
    -----
    basin: str
        String of MRBID or 'all'.

    Returns:
    --------
    If basin is 'all' a list of all relevant glaciers is returned, for
    initiating glacier simulations. If basin is a MRBID the list of glaciers
    within that basin is returned.
    
    Copy of a function written by Erik Holmgren (2022) in holmgren_gha.utils
    '''

    # fpath = './data/rgi_ids_per_basin.json'
    fpath = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/rgi_ids_per_basin.json'  
    with open(fpath) as f:
        basin_dict = json.load(f)

    if basin.lower() != 'all':
        glacier_list = basin_dict[basin]
    else:
        glacier_list = list(itertools.chain.from_iterable(basin_dict.values()))

    return glacier_list

In [50]:
basin_gls = {}
basin_gls_13 = {}   #Creating list that does not include glaciers in RGI 14 or 15
for basin, code in Alpine_basins.items():
    basin_gls[basin] = select_glaciers_json(code)
    basin_gls_13[basin] = []
    for g, glacier in enumerate(basin_gls[basin]):
        if int(basin_gls[basin][g][6:8]) == 13:
            basin_gls_13[basin].append(glacier)

# basin_gls_13 = {}
# for basin in basins:
#     basin_gls_13[basin] = []
#     for g, glacier in enumerate(basin_gls[basin]):
#         if int(basin_gls[basin][g][6:8]) == 13:
#             basin_gls_13[basin].append(glacier)

In [5]:
#Importing all runoff data, taking annual sum, and converting m^3 to km^3
import glob   #use glob to group files by filename similarities (in this case, SSP)

rf_ds = {}
#annual_rf_ds = {}
for s, SSP in enumerate(SSPs):
    fpath1 = '/R13_runoff_monthly_c2_ba1_1set_2000_2100-{}'.format(SSP)
    file_pattern = f'{fpathPy + fpath1}*.nc'
    file_list = glob.glob(file_pattern)
    #print(file_list)
    
    datasets = []  # Create an empty list for each SSP
    if file_list:
        for file in file_list:
            with xr.open_dataset(file) as ds:
                ds = ds.glac_runoff_monthly.load()
                datasets.append(ds)
    
        combined_ds = xr.concat(datasets, dim='glacier')  # Concatenate the datasets
        rf_ds[SSP] = combined_ds
        #annual_rf_ds[SSP] = rf_ds[SSP].resample(time='A').sum() * 1e-9

In [None]:
#We have to make 'Climate_Model' and 'RGIId' dimensions so that we can call by name not int
for s, SSP in enumerate(SSPs):
    rf_ds[SSP] = rf_ds[SSP].set_index(model='Climate_Model', glacier='RGIId')

In [52]:
basin_gls_13['TALAS']

['RGI60-13.11762',
 'RGI60-13.11763',
 'RGI60-13.11765',
 'RGI60-13.11766',
 'RGI60-13.11770',
 'RGI60-13.11811',
 'RGI60-13.11814',
 'RGI60-13.17003',
 'RGI60-13.17005',
 'RGI60-13.17006',
 'RGI60-13.17007',
 'RGI60-13.17008',
 'RGI60-13.17009',
 'RGI60-13.17010',
 'RGI60-13.17011',
 'RGI60-13.17012',
 'RGI60-13.17017',
 'RGI60-13.17018',
 'RGI60-13.17025',
 'RGI60-13.17028',
 'RGI60-13.17030',
 'RGI60-13.17031',
 'RGI60-13.17032',
 'RGI60-13.17033',
 'RGI60-13.17036',
 'RGI60-13.17037',
 'RGI60-13.17038',
 'RGI60-13.17041',
 'RGI60-13.17043',
 'RGI60-13.17044',
 'RGI60-13.17045',
 'RGI60-13.17046',
 'RGI60-13.17047',
 'RGI60-13.17048',
 'RGI60-13.17049',
 'RGI60-13.17050',
 'RGI60-13.17051',
 'RGI60-13.17088',
 'RGI60-13.17089',
 'RGI60-13.17090',
 'RGI60-13.17091',
 'RGI60-13.17117',
 'RGI60-13.17118',
 'RGI60-13.17119',
 'RGI60-13.17120',
 'RGI60-13.17122',
 'RGI60-13.17129',
 'RGI60-13.17130',
 'RGI60-13.17131',
 'RGI60-13.17132',
 'RGI60-13.17133',
 'RGI60-13.17134',
 'RGI60-13.1

In [51]:
glacier_data = []

# Step 2: Iterate over each glacier and extract its runoff data for each SSP and each model
for basin, RGIIDs in basin_gls_13.items():
    for RGIID in RGIIDs:
        for ssp in SSPs:
            for m, modelname in enumerate(modelnames_py):
                # Extract the runoff data for the current glacier, SSP, and model
                glacier_runoff = rf_ds[ssp].sel(model = modelname, glacier = RGIID)
                # Convert the DataArray to a NumPy array and then to a list
                glacier_runoff_list = glacier_runoff.values.flatten().tolist()
                # Append the data for the current glacier, SSP, and model to the list
                glacier_data.append([glacier, ssp, model] + glacier_runoff_list)

# Step 3: Create a DataFrame from the list of data
out_df = pd.DataFrame(glacier_data, columns=['Glacier', 'SSP', 'Model'] + rf_ds['ssp126'].time.values.tolist())

# Step 4: Write the DataFrame to a CSV file
output_dir = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/Load Separate/RGI 13/PyGEM/'

fname = 'RGI13_Py_runoff_unsorted.csv'

# Define the full path of the output file
output_path = os.path.join(output_dir, fname)

# Save the DataFrame as CSV
out_df.to_csv(output_path, header=True, index=True)
# csv_filename = 'glacier_runoff_data.csv'
# df.to_csv(csv_filename, index=False)

KeyError: "not all values found in index 'glacier'. Try setting the `method` keyword argument (example: method='nearest')."

In [None]:
# Sorting into basins
#basin_datasets = {}
basin_ds_monthly = {}
for basin, glacier_list in basin_gls.items():
    ## loop over them all, drop the irrelevant IDs, and concatenate the result
    #basin_datasets[basin] = {}
    basin_ds_monthly[basin] = {}
    for s, SSP in enumerate(SSPs):
        #ds_list = []
        ds_list_monthly = []
        try:
            #ds_filtered = annual_rf_ds[SSP].where(annual_rf_ds[SSP].RGIId.isin(glacier_list), drop=True)
            ds_filtered_monthly = rf_ds[SSP].where(rf_ds[SSP].RGIId.isin(glacier_list), drop=True)
            #print(ds_filtered)
            #ds_list.append(ds_filtered)
            ds_list_monthly.append(ds_filtered_monthly)
        except ValueError: ## happens if there are no glaciers from this batch in the selected region
            continue
        #basin_datasets[basin][SSP] = xr.concat(ds_list, dim='glacier')
        basin_ds_monthly[basin][SSP] = xr.concat(ds_list_monthly, dim='glacier') 

In [12]:
#Flipping indexing (to match other models) and summing basins
basin_sums_py = {}
basin_sums_monthly_py = {}
for s, SSP in enumerate(SSPs):        
    basin_sums_py[SSP] = {}
    basin_sums_monthly_py[SSP] = {}
    for basin, glacier_list in basin_gls.items():
        basin_sums_py[SSP][basin] = basin_datasets[basin][SSP].sum(dim='glacier')
        basin_sums_monthly_py[SSP][basin] = basin_ds_monthly[basin][SSP].sum(dim='glacier')*1e-9

### CSV Readout

In [13]:
## set up filename to reflect what you're writing out, possibly in a nested loop
modelnames_all = ['BCC-CSM2-MR', 'CESM2', 'CESM2-WACCM', 'EC-Earth3', 'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 
                  'INM-CM4-8', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']

scenarios = ['ssp126','ssp245','ssp370','ssp585']

basins = ['TARIM HE', 'ARAL SEA', 'INDUS']

In [14]:
# Create new index using pandas date_range function
start_date = datetime.date(2000, 1, 1)
end_date = datetime.date(2100, 12, 1)
indices = pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m').tolist()

#Creating dataframes of SSP, basin, and GCM containing all 3 global glacier models
out_df_RGI14 = {}
for s, SSP in enumerate(scenarios):
    out_df_RGI14[SSP] = {}
    for b, basin in enumerate(basins):
        out_df_RGI14[SSP][basin] = {}
        for m, model in enumerate(modelnames_all):
            pygem_values = pd.DataFrame(basin_sums_monthly_py[SSP][basin].sel(model=m + 1)).values.flatten()

            out_df_RGI14[SSP][basin][model] = pd.DataFrame(
                {
                    'PyGEM': pygem_values,
                    
                },
                index=indices
            )

In [16]:
# Exporting Yukon data
output_dir_RGI13 = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/Load Separate/RGI 13/PyGEM/'

for SSP in out_df_RGI14:
    for basin in out_df_RGI14[SSP]:
        for GCM in out_df_RGI14[SSP][basin]:
            fnameRGI14 = f"runoff_fromRGI14_{GCM}_{SSP}_{basin}.csv"

            # Define the full path of the output file
            output_pathRGI14 = os.path.join(output_dir_RGI13, fnameRGI14)

            # Save the DataFrame as CSV
            out_df_RGI14[SSP][basin][GCM].to_csv(output_pathRGI14, header=True, index=True)