## Aggregating Net Basin Runoff-GLoGem

#### Loading in all Modules and Data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
from datetime import date
import collections
import datetime
import os

## Generic the filepath to the main data folder
fpath0 = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/GloGEM Outputs/CentralEurope/files/'  
fpath1 = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/GloGEM Outputs/Runoff-intercomparison/GloGEM/Volume' 

#All of the climate models used
modelnames = ['BCC-CSM2-MR','CAMS-CSM1-0','CESM2','CESM2-WACCM','EC-Earth3','EC-Earth3-Veg','FGOALS-f3-L','GFDL-ESM4',
              'INM-CM4-8','INM-CM5-0','MPI-ESM1-2-HR','MRI-ESM2-0']

SSPpaths = ['ssp126','ssp245','ssp370','ssp585']   #Specifiying the SSP
SSPs = ['ssp119','ssp126','ssp245','ssp370','ssp585'] #Use a different path as we have all 5 ssps for volume

### Processing Runoff Data

In [None]:
all_discharges = [[] for _ in SSPpaths]

for s, SSPpath in enumerate(SSPpaths):
    model_discharges = []
    for modelname in modelnames:
        temp_df = pd.read_csv(fpath0 + modelname + '/' + SSPpaths[s]  + '/' + 'centraleurope_Discharge_r1.dat', sep='\s+', header=None, skiprows=1, index_col=0)
        model_discharges.append(temp_df)
    all_discharges[s] = model_discharges

In [None]:
all_areas = [[] for _ in SSPpaths]

for s, SSPpath in enumerate(SSPpaths):
    model_areas = []
    for modelname in modelnames:
        temp_df = pd.read_csv(fpath0 + modelname  + '/' + SSPpaths[s]  + '/' + 'centraleurope_Area_r1.dat', sep='\s+', index_col="ID")
        model_areas.append(temp_df)
    all_areas[s] = model_areas

In [None]:
# Create new index using pandas date_range function
start_date = datetime.date(1979, 10, 1)
end_date = datetime.date(2100, 9, 1)
new_indices = pd.date_range(start_date, end_date, freq='MS').strftime('%Y-%m').tolist()

# Apply new index and datetime conversion
for s, SSPpath_discharges in enumerate(all_discharges):
    for m, discharge_df in enumerate(SSPpath_discharges):
        all_discharges[s][m].columns = new_indices
        all_discharges[s][m].columns = pd.to_datetime(new_indices)

In [None]:
#expanding area dataset to match year-month dimension
for s in range(len(SSPpaths)):
    for i in range(len(all_areas[s])):
        all_areas[s][i] = all_areas[s][i][all_areas[s][i].columns.repeat(12)]
        
for s, areas in enumerate(all_areas):
    for i, area in enumerate(areas):
        all_areas[s][i].columns = new_indices
        all_areas[s][i].columns = pd.to_datetime(new_indices)

In [None]:
# We will use our initial area to compute runoff so we fill entire dfs with element 0
# We only use one SSP because the initial areas are all the same -- we save time without looping through all
# We also convert km^2 to m^2

initial_areas = [pd.DataFrame(df.iloc[:, 0].values.repeat(df.shape[1]).reshape(df.shape), index=df.index, columns=df.columns).mul(1e6) for df in all_areas[0]]


#### Having properly indexed our dataframes, we define the runoff of each glacier:

In [None]:
runoff = {s: {m: None for m in modelnames} for s in SSPpaths} # create nested dictionary indexed by model name and ssp
n=0
for s in SSPpaths:
    i=0
    for m in modelnames:
        runoff[s][m] = pd.concat([initial_areas[i] * all_discharges[n][i]], axis=1)
        i+=1
    n+=1

In [None]:
import itertools
annualrunoff = {s: {m: None for m in modelnames} for s in SSPpaths}
for s, m in itertools.product(SSPpaths, modelnames):
    annualrunoff[s][m] = runoff[s][m].transpose().resample('A').sum()

### Processing Volume Data:

In [None]:
#Importing the data for each SSP
all_volumes = [pd.read_csv(os.path.join(fpath1, SSP, 'RGIreg11_Volume_individual.dat'), sep='\s+', header=0, skiprows=1, index_col=0) for SSP in SSPs]

In [None]:
#Creating our new index
new_indices = []
for i in range(len(all_volumes[0])):
    new_indices.append(int(((all_volumes[0].index[i]-11)*10**5))) #Here we just treat the RGI ID as a float to extract the last digits
for i in range(len(SSPs)):
    for n in range(len(all_volumes[i])):
        all_volumes[i].index = new_indices

In [None]:
#Now we have to convert ice volume to water volume and km^3 to m^3
water_volumes = [(df * 0.9 * 10**6) for df in all_volumes]

In [None]:
#Making a new data set showing the change in volume
glacial_change = []
for i, vol_df in enumerate(water_volumes):
    diff_df = vol_df.diff(axis=1)
    diff_df.drop(diff_df.columns[0], axis=1, inplace=True)  # deleting first column so we can insert diff values
    glacial_change.append(diff_df)

In [None]:
#Create dictionary indexed by ssp
glacial_runoff = {s: -df for s, df in zip(SSPs, glacial_change)}

### Aggregating by basin:

In [None]:
import json
def select_glaciers_json(basin='all'):
    '''
    Select glaciers within a basin by MRBID from a json-file,
    which is stored in the data directory.

    Args:
    -----
    basin: str
        String of MRBID or 'all'.

    Returns:
    --------
    If basin is 'all' a list of all relevant glaciers is returned, for
    initiating glacier simulations. If basin is a MRBID the list of glaciers
    within that basin is returned.
    
    Copy of a function written by Erik Holmgren (2022) in holmgren_gha.utils
    '''

    # fpath = './data/rgi_ids_per_basin.json'
    fpath = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/rgi_ids_per_basin.json'  
    with open(fpath) as f:
        basin_dict = json.load(f)

    if basin.lower() != 'all':
        glacier_list = basin_dict[basin]
    else:
        glacier_list = list(itertools.chain.from_iterable(basin_dict.values()))

    return glacier_list

In [None]:
def sum_basin(basin_RGI_list, runoff_data):
    # Create new list to match our RGI formatting
    new_basin_list = [int(str(x)[-4:]) for x in basin_RGI_list]
    runoff_data = runoff_data.transpose()
    
    #TODO: create list of glaciers not included in GloGEM output
    # Filter new_basin_list to keep only the indexes present in the DataFrame
    new_basin_list = [x for x in new_basin_list if x in runoff_data.index]
    
    # Extract glaciers contained in the list from original df and create a new df
    new_df = runoff_data.loc[new_basin_list].copy()
    
    # Sum the values of the glaciers within the basin
    summed_basin_runoff = new_df.sum()
    #print(summed_basin_runoff)
    
    return summed_basin_runoff

In [None]:
#Generating the aggregated basin data
alpine_basins = {'RHINE':'6242', 'RHONE':'6243','PO':'6241', 'DANUBE':'6202'} ## GRDC Major River Basin identifiers for the 3 alpine basins we can study

Rhone_runoff = {s: {m: None for m in modelnames} for s in SSPpaths} # create nested dictionary indexed by model name and ssp
Rhine_runoff = {s: {m: None for m in modelnames} for s in SSPpaths} 
Po_runoff = {s: {m: None for m in modelnames} for s in SSPpaths}
Danube_runoff = {s: {m: None for m in modelnames} for s in SSPpaths} 

for s in SSPpaths:
    for m in modelnames:
        Rhine_runoff[s][m] = sum_basin(select_glaciers_json(alpine_basins['RHINE']), annualrunoff[s][m])*1e-9    #m^3 to km^3
        Rhone_runoff[s][m] = sum_basin(select_glaciers_json(alpine_basins['RHONE']), annualrunoff[s][m])*1e-9
        Po_runoff[s][m] = sum_basin(select_glaciers_json(alpine_basins['PO']), annualrunoff[s][m])*1e-9
        Danube_runoff[s][m] = sum_basin(select_glaciers_json(alpine_basins['DANUBE']), annualrunoff[s][m])*1e-9

        
rollingmean_Rhone = {s: {m: Rhone_runoff[s][m].rolling(30).mean() for m in modelnames} for s in SSPpaths}
rollingmean_Rhine = {s: {m: Rhine_runoff[s][m].rolling(30).mean() for m in modelnames} for s in SSPpaths}
rollingmean_Po = {s: {m: Po_runoff[s][m].rolling(30).mean() for m in modelnames} for s in SSPpaths}
rollingmean_Danube = {s: {m: Danube_runoff[s][m].rolling(30).mean() for m in modelnames} for s in SSPpaths}

In [None]:
# Setting up our color scheme
color_map = plt.colormaps['magma']
colors = {model: color_map((i)/len(modelnames)) for i, model in enumerate(modelnames)}

scenarios = ['ssp126', 'ssp245', 'ssp370', 'ssp585']
yrs_runoff = np.linspace(1980,2100, num=122)

fig, axs = plt.subplots(len(scenarios), 4, figsize=(12, 10), sharex=True)

for i, s in enumerate(scenarios):
    axs[i, 0].set(xlim=(2000, 2100), ylim=(.95, 1.55))              #Plotting Rhine
    axs[i, 0].text(2070, 1.49, s)
    for m in modelnames:
        axs[i, 0].plot(yrs_runoff, rollingmean_Rhine[s][m], label=m, c=colors[m])
for i, s in enumerate(scenarios):
    axs[i, 1].set(xlim=(2000, 2100), ylim=(2.3, 3.9)) 
    axs[i, 1].text(2070, 3.75, s)                                 #Plotting Rhone
    for m in modelnames:
        axs[i, 1].plot(yrs_runoff, rollingmean_Rhone[s][m], label=m, c=colors[m])
    
for i, s in enumerate(scenarios):
    axs[i, 2].set(xlim=(2000, 2100), ylim=(0.65, 1.2))               #Plotting Po
    axs[i, 2].text(2070, 1.15, s) 
    for m in modelnames:
        axs[i, 2].plot(yrs_runoff, rollingmean_Po[s][m], label=m, c=colors[m])

for i, s in enumerate(scenarios):
    axs[i, 3].set(xlim=(2000, 2100), ylim=(.78, 1.45))              #Plotting Danube
    axs[i, 3].text(2070, 1.39, s)
    for m in modelnames:
        axs[i, 3].plot(yrs_runoff, rollingmean_Danube[s][m], label=m, c=colors[m])

#Setting x labels
if i == 3:
    axs[i, 0].set_xlabel('Year')
    axs[i, 1].set_xlabel('Year')
    axs[i, 2].set_xlabel('Year')
    axs[i, 3].set_xlabel('Year')
#Setting y labels
for i in range(4):
    axs[i, 0].set_ylabel(r'Rolling Mean Runoff $[km^3]$')
        
plt.subplots_adjust(top=0.85, wspace=0.3, hspace=0.2)
plt.suptitle('Runoff of Major River Basins in Central Europe, Projected by GloGEM')
plt.title('Rhine River Basin                   Rhone River Basin                      Po River Basin                   Danube River Basin', x=-1.43, y=4.72)
axs[i, 0].legend(bbox_to_anchor=(4.96, 5.27), ncol=6)

plt.show()