### Aggregation of PyGEM runoff:

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from datetime import date
import collections
import datetime
import os
import xarray as xr

In [2]:
#All of the climate models used
modelnames_py = ['BCC-CSM2-MR','CESM2','CESM2-WACCM','EC-Earth3','EC-Earth3-Veg','FGOALS-f3-L','GFDL-ESM4',
              'INM-CM4-8','INM-CM5-0','MPI-ESM1-2-HR','MRI-ESM2-0', 'NorESM2-MM']

SSPs = ['ssp126','ssp245','ssp370','ssp585'] #List of all SSPs in PyGEM

Alpine_basins =  {'LAKE BALKHASH':'2910'}

basins = ['LAKE BALKHASH']

#Generic filepath to navigate to Drive folder 
fpathPy = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/Runoff-intercomparison/PyGEM/13'

In [3]:
import json
def select_glaciers_json(basin='all'):
    '''
    Select glaciers within a basin by MRBID from a json-file,
    which is stored in the data directory.

    Args:
    -----
    basin: str
        String of MRBID or 'all'.

    Returns:
    --------
    If basin is 'all' a list of all relevant glaciers is returned, for
    initiating glacier simulations. If basin is a MRBID the list of glaciers
    within that basin is returned.
    
    Copy of a function written by Erik Holmgren (2022) in holmgren_gha.utils
    '''

    # fpath = './data/rgi_ids_per_basin.json'
    fpath = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/rgi_ids_per_basin.json'  
    with open(fpath) as f:
        basin_dict = json.load(f)

    if basin.lower() != 'all':
        glacier_list = basin_dict[basin]
    else:
        glacier_list = list(itertools.chain.from_iterable(basin_dict.values()))

    return glacier_list

In [4]:
basin_gls = {}
for basin, code in Alpine_basins.items():
    basin_gls[basin] = select_glaciers_json(code)

In [5]:
#Importing all runoff data, taking annual sum, and converting m^3 to km^3
import glob   #use glob to group files by filename similarities (in this case, SSP)

rf_ds = {}
#annual_rf_ds = {}
for s, SSP in enumerate(SSPs):
    fpath1 = '/R13_runoff_monthly_c2_ba1_1set_2000_2100-{}'.format(SSP)
    file_pattern = f'{fpathPy + fpath1}*.nc'
    file_list = glob.glob(file_pattern)
    #print(file_list)
    
    datasets = []  # Create an empty list for each SSP
    if file_list:
        for file in file_list:
            with xr.open_dataset(file) as ds:
                ds = ds.glac_runoff_monthly.load()
                datasets.append(ds)
    
        combined_ds = xr.concat(datasets, dim='glacier')  # Concatenate the datasets
        rf_ds[SSP] = combined_ds
        #annual_rf_ds[SSP] = rf_ds[SSP].resample(time='A').sum() * 1e-9

In [6]:
# Sorting into basins
#basin_datasets = {}
basin_ds_monthly = {}
for basin, glacier_list in basin_gls.items():
    ## loop over them all, drop the irrelevant IDs, and concatenate the result
    #basin_datasets[basin] = {}
    basin_ds_monthly[basin] = {}
    for s, SSP in enumerate(SSPs):
        #ds_list = []
        ds_list_monthly = []
        try:
            #ds_filtered = annual_rf_ds[SSP].where(annual_rf_ds[SSP].RGIId.isin(glacier_list), drop=True)
            ds_filtered_monthly = rf_ds[SSP].where(rf_ds[SSP].RGIId.isin(glacier_list), drop=True)
            #print(ds_filtered)
            #ds_list.append(ds_filtered)
            ds_list_monthly.append(ds_filtered_monthly)
        except ValueError: ## happens if there are no glaciers from this batch in the selected region
            continue
        #basin_datasets[basin][SSP] = xr.concat(ds_list, dim='glacier')
        basin_ds_monthly[basin][SSP] = xr.concat(ds_list_monthly, dim='glacier') 

In [7]:
#Flipping indexing (to match other models) and summing basins
#basin_sums_py = {}
basin_sums_monthly_py = {}
for s, SSP in enumerate(SSPs):        
    #basin_sums_py[SSP] = {}
    basin_sums_monthly_py[SSP] = {}
    for basin, glacier_list in basin_gls.items():
        #basin_sums_py[SSP][basin] = basin_datasets[basin][SSP].sum(dim='glacier')
        basin_sums_monthly_py[SSP][basin] = basin_ds_monthly[basin][SSP].sum(dim='glacier')*1e-9

### CSV Readout

In [8]:
## set up filename to reflect what you're writing out, possibly in a nested loop
modelnames_all = ['BCC-CSM2-MR', 'CESM2', 'CESM2-WACCM', 'EC-Earth3', 'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 
                  'INM-CM4-8', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']

scenarios = ['ssp126','ssp245','ssp370','ssp585']

basins = ['LAKE BALKHASH']

In [9]:
# Create new index using pandas date_range function
start_date = datetime.date(2000, 1, 1)
end_date = datetime.date(2100, 12, 1)
indices = pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m').tolist()

#Creating dataframes of SSP, basin, and GCM containing all 3 global glacier models
out_df_RGI13 = {}
for s, SSP in enumerate(scenarios):
    out_df_RGI13[SSP] = {}
    for b, basin in enumerate(basins):
        out_df_RGI13[SSP][basin] = {}
        for m, model in enumerate(modelnames_all):
            pygem_values = pd.DataFrame(basin_sums_monthly_py[SSP][basin].sel(model=m + 1)).values.flatten()

            out_df_RGI13[SSP][basin][model] = pd.DataFrame(
                {
                    'PyGEM': pygem_values,
                    
                },
                index=indices
            )

In [10]:
# Exporting Yukon data
output_dir_RGI13 = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/Load Separate/RGI 13/PyGEM/'

for SSP in out_df_RGI13:
    for basin in out_df_RGI13[SSP]:
        for GCM in out_df_RGI13[SSP][basin]:
            fnameRGI13 = f"runoff_{GCM}_{SSP}_{basin}.csv"

            # Define the full path of the output file
            output_pathRGI13 = os.path.join(output_dir_RGI13, fnameRGI13)

            # Save the DataFrame as CSV
            out_df_RGI13[SSP][basin][GCM].to_csv(output_pathRGI13, header=True, index=True)