# Plot precip to contextualize drought metrics
We have been studying SPEI in context with percent glaciation and with historical aridity index.  We know from Finn's work that glacier models modify the incoming precip over glaciers, sometimes by a lot.  Is this at play in the basins where we see differences among the models' drought indices?

18 Jun 2024 | EHU
- 19 Jun 24: Added full set of 11 GCMs that Sloan pulled.  Updating model names accordingly.
- 25 Jul 24: Correct the units of the aggregated precip.  Current version is m3/mo.  Get to mm/day by (1) dividing by basin area in m2 and number of days in the associated month and (2) multiply by 1000 (m-->mm)
- 7 Aug 24: Correct read-in -- `P_by_basin` was reading in order of basins_all instead of basin_name_list. Updating this to reflect correct order makes these plots align with what Sloan has previously shown.
- 14 Aug 2024: Clean up process. Make plot of all 75 basins included in multi-glacier-model intercomparison study

In [None]:
import datetime
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.patches import Rectangle
import gSPEI as gSPEI

In [None]:
default_colors = cm.get_cmap('plasma')

## Preliminary step: plot GCM precip directly
We have been showing a plot from Sloan that compares CMIP6 model dispersion in total basin precip over the Rhone.  What is the spread in our selected set of models?

In [None]:
## Define the filepath streamed from Google Drive
fpath = '/Volumes/GoogleDrive/My Drive/Runoff-intercomparison/GCM-PET_Precip/'


## Settings in filenames -- slightly different model names and scenarios
modelnames = ['BCC-CSM2-MR',
              'MPI-ESM1-2-HR',
              'MRI-ESM2-0',
              'CESM2-WACCM',
              'NorESM2-MM',
              'FGOALS-f3-L',
              'GFDL-ESM4',
              'INM-CM4-8',
              'INM-CM5-0',
              'EC-Earth3',
              'EC-Earth3-Veg']
# scenarios = ['Ssp2p6', 'Ssp4p5', 'Ssp7p0', 'Ssp8p5'] # climate scenarios
scenarios = ['Ssp4p5', 'Ssp8p5'] # climate scenarios included for 11 GCM set


yrs = np.linspace(1900, 2101, num=2412) # time interval over which we have data

In [None]:
## Basins in the order they are written
basins_all = {'RHINE':'6242', 'RHONE':'6243','PO':'6241', 'DANUBE':'6202', 'TITICACA':'3912', 'SANTA':'3425', 
            'OCONA':'3418', 'MAJES':'3416', 'MAGDALENA':'3227', 'AMAZON':'3203', 'YELCHO':'3429', 
            'VALDIVIA':'3428', 'SERRANO':'3426', 'RAPEL':'3423', 'PUELO':'3422', 'PASCUA':'3420', 
            'PALENA':'3419', 'HUASCO':'3412', 'COPIAPO':'3409', 'CISNES':'3408', 'BIOBIO':'3405', 'BAKER':'3404',
            'AZOPARDO':'3403', 'AISEN':'3401', 'SANTA CRUZ':'3244', 'NEGRO':'3232', 'COLORADO':'3212', 
            'CHICO':'3209', 'TORNEALVEN':'6255', 'THJORSA':'6254', 'OLFUSA':'6237', 'LULEALVEN':'6227', 
            'KUBAN':'6223', 'KALIXALVEN':'6219', 'GLOMAA':'6213', 'DRAMSELVA':'6209', 'SVARTA':'6110', 
            'LAGARFLJOT':'6104', 'JOKULSA A FJOLLUM':'6101', 'CLUTHA':'5406', 'YUKON':'4435', 'TAKU':'4431', 
             'SUSITNA':'4430','STIKINE':'4428', 'SKEENA':'4427','SKAGIT':'4426','NUSHAGAK':'4418','NASS':'4416',
            'KUSKOKWIM':'4414','FRASER':'4410', 'COPPER':'4408', 'COLUMBIA':'4406', 'ALSEK':'4401', 'NELSON':'4125', 
              'MACKENZIE':'4123','COLVILLE':'4110', 'YSYK-KOL':'2919', 'UVS NUUR':'2918', 'TARIM HE':'2914', 
              'TALAS':'2913', 'LAKE BALKHASH':'2910','HAR US NUUR':'2909', 'CHUY':'2905', 'ARAL SEA':'2902', 
              'YELLOW RIVER':'2434', 'MEKONG':'2421', 'KAMCHATKA':'2413', 'SALWEEN':'2319', 'IRRAWADDY':'2310', 
              'INDUS':'2309', 'GANGES':'2306','BRAHMAPUTRA':'2302', 'OB':'2108', 'INDIGIRKA':'2103','YANGTZE' : '2433'}

In [None]:
#Loading in total Basin area data -- following Finn percent glaciation code
from scipy.io import loadmat
import pandas as pd
path_to_area = '/Volumes/GoogleDrive/My Drive/Runoff-intercomparison/Summary-statistics/BasinArea.mat' ## need access!
BasinAreas = loadmat(path_to_area)
#Creating indexed df
basin_areas = BasinAreas['BasinArea']
basin_names = BasinAreas['BasinNam']
basin_name_list = [name[1][0] for name in basin_names]

revised_names = {'ISSYK-KUL': 'YSYK-KOL', 'LAGARFLIOT': 'LAGARFLJOT'}.get ## revise to align Sloan's names with Finn's spelling
basin_name_list = [revised_names(n,n) for n in basin_name_list]

TotalBasinAreas = pd.DataFrame({'Basin Area': basin_areas.squeeze()}, index=basin_name_list).sort_index()
# TotalBasinAreas = TotalBasinAreas.rename(index={'ISSYK-KUL': 'YSYK-KOL', 'LAGARFLIOT' : 'LAGARFLJOT'}).sort_index() #Same Basins

In [None]:
TotalBasinAreas

In [None]:
P_by_model = {m: {} for m in modelnames}
for m in modelnames:
    for s in scenarios:
        example_p = fpath+'{}_PREC_{}.txt'.format(m,s)
        P_by_model[m][s] = np.loadtxt(example_p)

In [None]:
# P_by_basin = {b: {} for b in basins_all} # precipitation by basin
P_by_basin = {b: {} for b in basin_name_list} # precipitation by basin, ordered per Sloan


for i, b in enumerate(basin_name_list): ## note this must be the list in Sloan's order
    P_by_basin[b] = {s: {} for s in scenarios}
    for s in scenarios:
        tempdict_p = {}
        for m in modelnames:
            tempdict_p[m] = P_by_model[m][s][i]
        P_by_basin[b][s] = pd.DataFrame.from_dict(tempdict_p)

Spot-check total precip for an example basin.

In [None]:
this_basin = 'RHONE'
rng = pd.date_range('1900-01-01', periods=2412, freq='M')

In [None]:
fig, ax = plt.subplots()
for i,this_GCM in enumerate(modelnames):
    this_precip = pd.Series(P_by_basin[this_basin]['Ssp4p5'][this_GCM])
    this_precip.index=rng
    this_precip_dailyavg = this_precip / this_precip.index.days_in_month
    ## handy days_in_month utility
    this_precip_mmday = this_precip_dailyavg*1000 / (TotalBasinAreas.loc[this_basin].squeeze()*1e6)
    ## convert from m--> mm on top, km2-->m2 on bottom
    ax.plot(this_precip_mmday.resample('A').mean(), label=this_GCM, color=default_colors(i/len(modelnames)))
ax.set(xlim=(pd.datetime(1999,12,31),pd.datetime(2100,12,31)),
      xlabel='Year',
      ylabel='Basin aggregated precip [mm/day]',
      title='{} basin precip (30-year rolling mean, rel. 2000-2020) \n from GCMs used to force glacier models'.format(str.capitalize(this_basin)));
ax.legend(loc='best')

Sloan previously plotted mm/day relative to 2000-2020.  Remove 2000-2020 mean and check anomaly series.



In [None]:
fig, ax = plt.subplots()
ax.axhline(0, color='k', ls=':', lw= 0.5)
for i,this_GCM in enumerate(modelnames):
    this_precip = pd.Series(P_by_basin[this_basin]['Ssp4p5'][this_GCM])
    this_precip.index=rng
    
    this_precip_dailyavg = this_precip / this_precip.index.days_in_month
    ## handy days_in_month utility
    this_precip_mmday = this_precip_dailyavg*1000 / (TotalBasinAreas.loc[this_basin].squeeze()*1e6)

    hist_mean = this_precip_mmday['2000-01-31':'2020-12-31'].resample('A').mean().mean()
    ax.plot((this_precip_mmday.resample('A').mean()-hist_mean), label=this_GCM, color=default_colors(i/len(modelnames)))
ax.legend(bbox_to_anchor=(1.05, 1.0), ncol=1)

ax.set(xlim=(pd.datetime(1999,12,31),pd.datetime(2100,12,31)),
      xlabel='Year',
      ylabel='Basin aggregated precip [mm/day]',
      title='{} basin precip anomaly (relative to 2000-2020) \n from GCMs used to force glacier models'.format(str.capitalize(this_basin)));

In [None]:
fig, ax = plt.subplots()
ax.axhline(0, color='k', ls=':', lw= 0.5)
for i,this_GCM in enumerate(modelnames):
    this_precip = pd.Series(P_by_basin[this_basin]['Ssp4p5'][this_GCM])
    this_precip.index=rng
    
    this_precip_dailyavg = this_precip / this_precip.index.days_in_month
    ## handy days_in_month utility
    this_precip_mmday = this_precip_dailyavg*1000 / (TotalBasinAreas.loc[this_basin].squeeze()*1e6)

    hist_mean = this_precip_mmday['2000-01-31':'2020-12-31'].resample('A').mean().mean()
    ax.plot((this_precip_mmday.resample('A').mean()-hist_mean).rolling(window=30).mean(), label=this_GCM, color=default_colors(i/len(modelnames)))
ax.legend(bbox_to_anchor=(1.05, 1.0), ncol=1)

ax.set(xlim=(pd.datetime(1999,12,31),pd.datetime(2100,12,31)),
      xlabel='Year',
      ylabel='Basin precip anomaly [mm/day]',
      title='{} basin precip anomaly (30-year rolling mean, rel. 2000-2020) \n from GCMs used to force glacier models'.format(str.capitalize(this_basin)));

Compare with the CMIP archive that Sloan visualized.  This has roughly the same order of magnitude, though the range is narrower than the full CMIP6 ensemble (here roughly -0.3 to +0.3, full ensemble roughly -0.8 to +0.4).  The 12 models do not seem to have a consistent increasing/decreasing trend; the full CMIP6 archive did not either.  Let's look at some other basins.

## Spot check a few other basins

In [None]:
test_basins = ['RHINE', 'RHONE', 'DANUBE', 'PO']

fig, axs = plt.subplots(2,2, sharex=True)
for i, ax in enumerate(axs.ravel()):
    this_basin = test_basins[i]
    basin_label = ' '.join([s.capitalize() for s in this_basin.split(' ')])
    
    ax.axhline(0, color='k', ls=':', lw= 0.5)
    for i,this_GCM in enumerate(modelnames):
        this_precip = pd.Series(P_by_basin[this_basin]['Ssp4p5'][this_GCM])
        this_precip.index=rng
        
        this_precip_dailyavg = this_precip / this_precip.index.days_in_month
        ## handy days_in_month utility
        this_precip_mmday = this_precip_dailyavg*1000 / (TotalBasinAreas.loc[this_basin].squeeze()*1e6)
        
        hist_mean = this_precip_mmday['2000-01-31':'2020-12-31'].resample('A').mean().mean()
        ax.plot((this_precip_mmday.resample('A').mean()-hist_mean), label=this_GCM, color=default_colors(i/len(modelnames)))

    ax.set(xlim=(datetime.datetime(1999,12,31),datetime.datetime(2100,12,31)))
    ax.text(0.7, 0.1, basin_label, transform=ax.transAxes)

axs[0,1].legend(bbox_to_anchor=(1.05, 1.0), ncol=1)
fig.suptitle('Basin precip anomaly (relative to 2000-2020) \n from GCMs used to force glacier models')
fig.supxlabel('Year')
fig.supylabel('Basin monthly precip anom. [mm/day]');

In [None]:
fig, axs = plt.subplots(2,2, sharex=True)
for i, ax in enumerate(axs.ravel()):
    this_basin = test_basins[i]
    basin_label = ' '.join([s.capitalize() for s in this_basin.split(' ')]) ## manage multi-word basin names
    
    ax.axhline(0, color='k', ls=':', lw= 0.5)
    for i,this_GCM in enumerate(modelnames):
        this_precip = pd.Series(P_by_basin[this_basin]['Ssp4p5'][this_GCM])
        this_precip.index=rng
        
        this_precip_dailyavg = this_precip / this_precip.index.days_in_month
        ## handy days_in_month utility
        this_precip_mmday = this_precip_dailyavg*1000 / (TotalBasinAreas.loc[this_basin].squeeze()*1e6)
        
        hist_mean = this_precip_mmday['2000-01-31':'2020-12-31'].resample('A').mean().mean()
        ax.plot((this_precip_mmday.resample('A').mean()-hist_mean).rolling(window=30).mean(), 
                label=this_GCM, color=default_colors(i/len(modelnames)))

    ax.set(xlim=(datetime.datetime(1999,12,31),datetime.datetime(2100,12,31)))
    ax.text(0.1, 0.9, basin_label, transform=ax.transAxes)

axs[0,1].legend(bbox_to_anchor=(1.05, 1.0), ncol=1)
fig.suptitle('Basin precip anomaly (30-year rolling mean, rel. to 2000-2020) \n from GCMs used to force glacier models')
fig.supxlabel('Year')
fig.supylabel('Basin precip anom [mm/day]')

These (Rhine, Rhone, Danube, Po) look relatively similar.  We don't have the full CMIP6 archive plot to compare; consider whether it makes sense for Sloan/Lizz to run that comparison for more example basins.

## Precip extracted for all 75 basins in runoff analysis

In [None]:
fig, axs = plt.subplots(15,5, figsize=(13,16), sharex=True)
for j,b in enumerate(basin_name_list):
    ax = axs.ravel()[j]
    ax.axhline(0, color='k', ls=':', lw=0.5)
    
    basin_label = ' '.join([s.capitalize() for s in b.split(' ')]) ## manage multi-word basin names
    
    for i,this_GCM in enumerate(modelnames):
        this_precip = pd.Series(P_by_basin[b]['Ssp4p5'][this_GCM])
        this_precip.index=rng
        
        this_precip_dailyavg = this_precip / this_precip.index.days_in_month
        ## handy days_in_month utility
        this_precip_mmday = this_precip_dailyavg*1000 / (TotalBasinAreas.loc[b].squeeze()*1e6)
        
        hist_mean = this_precip_mmday['2000-01-31':'2020-12-31'].resample('A').mean().mean()
        ax.plot((this_precip_mmday.resample('A').mean()-hist_mean).rolling(window=30).mean(), 
                label=this_GCM, color=default_colors(i/len(modelnames)))

    ax.set(xlim=(datetime.datetime(1999,12,31),datetime.datetime(2100,12,31)))
    ax.text(0.1, 0.9, basin_label, transform=ax.transAxes)

axs[0,4].legend(bbox_to_anchor=(1.05, 1.0), ncol=1)
axs[14,2].set_xlabel('Year', fontsize=14) ## use a direct axis label so that suptitle is not so far away from the axes
for k in range(len(axs[14])):
    axs[14,k].set(xticks=pd.date_range(datetime.datetime(2000,1,1), periods=6, freq='20YS').tolist(),
                  xticklabels=['2000','','','','', '2100'])
    axs[14,k].tick_params(axis='x', rotation=45)

# fig.supxlabel('Month', fontsize=14)
fig.suptitle('Basin precip anomaly (30-year rolling mean, rel. to 2000-2020) \n from GCMs used to force glacier models',
            fontsize=14);
# fig.supxlabel('Year', fontsize=14);
fig.supylabel('Basin precip anom [mm/day]', fontsize=14);
plt.tight_layout()

## Percent change in precip
We'd like to be able to compare these plots with each other on the same axes.  Express as percent change rather than precip anomaly.

In [None]:
fig, axs = plt.subplots(15,5, figsize=(13,16), sharex=True, sharey=True)
for j,b in enumerate(basin_name_list):
    ax = axs.ravel()[j]
    basin_label = ' '.join([s.capitalize() for s in b.split(' ')]) ## manage multi-word basin names
    
    for i,this_GCM in enumerate(modelnames):
        this_precip = pd.Series(P_by_basin[b]['Ssp4p5'][this_GCM])
        this_precip.index=rng
        
        this_precip_dailyavg = this_precip / this_precip.index.days_in_month
        ## handy days_in_month utility
        this_precip_mmday = this_precip_dailyavg*1000 / (TotalBasinAreas.loc[b].squeeze()*1e6)
        
        hist_mean = this_precip_mmday['2000-01-31':'2020-12-31'].resample('A').mean().mean()
        ax.plot(((this_precip_mmday.resample('A').mean()-hist_mean).rolling(window=30).mean())/(hist_mean*0.01), ## express as percent change 
                label=this_GCM, color=default_colors(i/len(modelnames)))
        
    ax.axhline(0, color='k', ls=':')
    ax.set(xlim=(datetime.datetime(1999,12,31),datetime.datetime(2100,12,31)),
          ylim=[-20,20])
    ax.text(0.1, 0.8, basin_label, transform=ax.transAxes)

axs[0,4].legend(bbox_to_anchor=(1.05, 1.0), ncol=1)
axs[14,2].set_xlabel('Year', fontsize=14) ## use a direct axis label so that suptitle is not so far away from the axes
for k in range(len(axs[14])):
    axs[14,k].set(xticks=pd.date_range(datetime.datetime(2000,1,1), periods=6, freq='20YS').tolist(),
                  xticklabels=['2000','','','','', '2100'])
    axs[14,k].tick_params(axis='x', rotation=45)

# fig.supxlabel('Month', fontsize=14)
fig.suptitle('Basin precip % change (30-year rolling mean, rel. to 2000-2020) \n from GCMs used to force glacier models',
            fontsize=14);
# fig.supxlabel('Year', fontsize=14);
fig.supylabel('Basin precip change [% 2000-2020]', fontsize=14);
plt.tight_layout()