## Aggregation of GloGEM runoff:

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from datetime import date
import collections
import datetime
import os
import xarray as xr

## Generic the filepath to the main data folder
fpath0 = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/Runoff-intercomparison/GloGEM-output/RGI06-Iceland/files/'  

#All of the climate models used
modelnames = ['BCC-CSM2-MR','CAMS-CSM1-0','CESM2','CESM2-WACCM','EC-Earth3','EC-Earth3-Veg','FGOALS-f3-L','GFDL-ESM4',
              'INM-CM4-8','INM-CM5-0','MPI-ESM1-2-HR','MRI-ESM2-0', 'NorESM2-MM']

SSPpaths = ['ssp126','ssp245','ssp370','ssp585']   #Specifiying the SSP
#SSPs = ['ssp119','ssp126','ssp245','ssp370','ssp585'] #Use a different path as we have all 5 ssps for volume

### Loading/Processing

In [15]:
all_discharges = [[] for _ in SSPpaths]

for s, SSPpath in enumerate(SSPpaths):
    model_discharges = []
    for modelname in modelnames:
        temp_df = pd.read_csv(fpath0 + modelname + '/' + SSPpaths[s]  + '/' + 'Iceland_Discharge_r1.dat', sep='\s+', header=None, skiprows=1, index_col=0)
        model_discharges.append(temp_df)
    all_discharges[s] = model_discharges

In [16]:
# Create new index using pandas date_range function
start_date = datetime.date(1980, 1, 1)
end_date = datetime.date(2100, 12, 1)
new_indices = pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m').tolist()

# Apply new index and datetime conversion
for s, SSPpath_discharges in enumerate(all_discharges):
    for m, discharge_df in enumerate(SSPpath_discharges):
        all_discharges[s][m].columns = new_indices
        all_discharges[s][m].columns = pd.to_datetime(new_indices)

In [17]:
# We only use  initial area to compute runoff
# We also convert km^2 to m^2

runoff = {s: {m: None for m in modelnames} for s in SSPpaths}  # create nested dictionary indexed by model name and ssp
all_areas = {s: {m: None for m in modelnames} for s in SSPpaths}

for s, SSP in enumerate(SSPpaths):
    for m, modelname in enumerate(modelnames):
        temp_df = pd.read_csv(fpath0 + modelname + '/' + SSPpaths[s] + '/' + 'Iceland_Area_r1.dat', sep='\s+', index_col="ID")
        all_areas[SSP][modelname] = temp_df
        
        temp_df = all_areas[SSP][modelname].iloc[:, 0].values.repeat(all_discharges[s][m].shape[1]).reshape(all_discharges[s][m].shape)
        initial_areas = pd.DataFrame(temp_df, index=all_discharges[s][m].index, columns=all_discharges[s][m].columns).mul(1e6)
        runoff[SSP][modelname] = pd.concat([initial_areas * all_discharges[s][m]], axis=1)

In [18]:
import itertools
annualrunoff = {s: {m: None for m in modelnames} for s in SSPpaths}
for s, m in itertools.product(SSPpaths, modelnames):
    annualrunoff[s][m] = runoff[s][m].transpose().resample('A').sum() * 1e-9  #m^3 to km^3

In [19]:
import json
def select_glaciers_json(basin='all'):
    '''
    Select glaciers within a basin by MRBID from a json-file,
    which is stored in the data directory.

    Args:
    -----
    basin: str
        String of MRBID or 'all'.

    Returns:
    --------
    If basin is 'all' a list of all relevant glaciers is returned, for
    initiating glacier simulations. If basin is a MRBID the list of glaciers
    within that basin is returned.
    
    Copy of a function written by Erik Holmgren (2022) in holmgren_gha.utils
    '''

    # fpath = './data/rgi_ids_per_basin.json'
    fpath = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/rgi_ids_per_basin.json'  
    with open(fpath) as f:
        basin_dict = json.load(f)

    if basin.lower() != 'all':
        glacier_list = basin_dict[basin]
    else:
        glacier_list = list(itertools.chain.from_iterable(basin_dict.values()))

    return glacier_list

In [20]:
def sum_basin(basin_RGI_list, runoff_data):
    # Create new list to match our RGI formatting
    new_basin_list = [int(str(x)[-5:]) for x in basin_RGI_list]
    runoff_data = runoff_data.transpose()
    
    #TODO: create list of glaciers within a basin that are not included in GloGEM output
    # Filter new_basin_list to keep only the indexes present in the DataFrame
    new_basin_list = [x for x in new_basin_list if x in runoff_data.index]
    
    # Extract glaciers contained in the list from original df and create a new df
    new_df = runoff_data.loc[new_basin_list].copy()
    
    # Sum the values of the glaciers within the basin
    summed_basin_runoff = new_df.sum()
    #print(summed_basin_runoff)
    
    return summed_basin_runoff

In [21]:
#Generating the aggregated basin data

Alpine_basins = {'THJORSA':'6254', 'OLFUSA':'6237', 'SVARTA':'6110', 'LAGARFLJOT':'6104', 'JOKULSA A FJOLLUM':'6101'}

basins = ['THJORSA', 'OLFUSA', 'SVARTA', 'LAGARFLJOT', 'JOKULSA A FJOLLUM']

modelnames_glo = ['BCC-CSM2-MR','CAMS-CSM1-0','CESM2','CESM2-WACCM','EC-Earth3','EC-Earth3-Veg','FGOALS-f3-L','GFDL-ESM4',
              'INM-CM4-8','INM-CM5-0','MPI-ESM1-2-HR','MRI-ESM2-0', 'NorESM2-MM']

scenarios = ['ssp126','ssp245','ssp370','ssp585']

basin_sums_glo = {}
basin_sums_monthly_glo = {}
for s, SSP in enumerate(scenarios):
    basin_sums_glo[SSP] = {}
    basin_sums_monthly_glo[SSP] = {}
    for b, basin in enumerate(basins):
        basin_sums_glo[SSP][basin] = {}
        basin_sums_monthly_glo[SSP][basin] = {}
        for m, model in enumerate(modelnames_glo):
            basin_sums_glo[SSP][basin][model] = sum_basin(select_glaciers_json(Alpine_basins[basin]), annualrunoff[SSP][model]) 
            basin_sums_monthly_glo[SSP][basin][model] = sum_basin(select_glaciers_json(Alpine_basins[basin]), runoff[SSP][model].transpose()*1e-9) 

### CSV Readout

In [22]:
## set up filename to reflect what you're writing out, possibly in a nested loop
modelnames_all = ['BCC-CSM2-MR', 'CESM2', 'CESM2-WACCM', 'EC-Earth3', 'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 
                  'INM-CM4-8', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']

scenarios = ['ssp126','ssp245','ssp370','ssp585']

basins = ['THJORSA', 'OLFUSA', 'SVARTA', 'LAGARFLJOT', 'JOKULSA A FJOLLUM']

In [23]:
#Using the GloGEM datetime index
indices = basin_sums_monthly_glo[SSP][basin][model][240::].index

#Creating dataframes of SSP, basin, and GCM containing all 3 global glacier models
out_df = {}
for s, SSP in enumerate(scenarios):
    out_df[SSP] = {}
    for b, basin in enumerate(basins):
        out_df[SSP][basin] = {}
        for m, model in enumerate(modelnames_all):
            glo_values = basin_sums_monthly_glo[SSP][basin][model][240::].values.flatten()

            out_df[SSP][basin][model] = pd.DataFrame(
                {
                    'GloGEM': glo_values,
                    
                },
                index=indices
            )

In [24]:
# Define the directory to save the CSV files
output_dir = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/Load Separate/RGI 06/GloGEM/'

for SSP in out_df:
    for basin in out_df[SSP]:
        for GCM in out_df[SSP][basin]:
            fname = f"runoff_{GCM}_{SSP}_{basin}.csv"

            # Define the full path of the output file
            output_path = os.path.join(output_dir, fname)

            # Save the DataFrame as CSV
            out_df[SSP][basin][GCM].to_csv(output_path, header=True, index=True)