This notebook creates visuals which relate the corrected historical precipitation implemented by each model to their runoff projections. The data has been processed elsewhere.

Last edited: April 1, 2024 | FFW

Loading in precip data:

In [None]:
import numpy as np
import pandas as pd
from datetime import date
import collections
import datetime
import os
import xarray as xr
import csv
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from sklearn.preprocessing import MinMaxScaler
import itertools
import glob
from scipy.stats import pearsonr, spearmanr
from sklearn.linear_model import LinearRegression

## Generic the filepath to the main data folder
fpath1 = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/Precipitation Analysis/'
gmodels = ['GloGEM', 'PyGEM', 'OGGM']

In [None]:
file_pattern = os.path.join(fpath1, f"*.csv")
file_list = glob.glob(file_pattern)

dfs = []  # List to store DataFrames

for file, gmodel in zip(file_list, ['OGGM', 'PyGEM', 'GloGEM']):
    temp_df = pd.read_csv(file, index_col=0)
    temp_df.columns = [gmodel]  # Rename column to gmodel
    dfs.append(temp_df)  # Append each DataFrame to the list

all_precip_data = pd.concat(dfs, axis=1)*1e-9  #Converting m^3 to km^3

In [None]:
# Calculate mean value for each basin across the gmodels
prcp_means = all_precip_data.mean(axis=1)

# Normalize values for each gmodel by dividing by the mean
normalized_prcp_values = all_precip_data.divide(prcp_means, axis=0)

Loading in RMQ data:

In [None]:
# Create an empty dictionary to store RMQ DataFrames for each GCM and SSP
fpath0 = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/'

scenarios = ['ssp126','ssp245','ssp370','ssp585']

modelnames = ['BCC-CSM2-MR', 'CESM2', 'CESM2-WACCM', 'EC-Earth3', 'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 
                  'INM-CM4-8', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']
RMQ = {}   
for s, SSP in enumerate(scenarios):
    RMQ[SSP] = {}
    for m, GCM in enumerate(modelnames):
        
        file = f"RMQ{GCM}_{SSP}.csv"
        temp_df = pd.read_csv(fpath0 + 'RMQ/'+ file, index_col=0)
    
        # Store the combined_df in the dictionary for each SSP
        RMQ[SSP][GCM] = temp_df

In [None]:
#Create new dict to store multi-GCM means
RMQ_GCM_means = {}
for g, gmodel in enumerate(gmodels):
    RMQ_GCM_means[gmodel] = {}
    for s, SSP in enumerate(scenarios):
        all_GCMs = []
        for m, GCM in enumerate(modelnames):
            all_GCMs.append(RMQ[SSP][GCM][gmodel])
            
        # Calculate the mean for each year across the models
        temp_df = pd.DataFrame(all_GCMs)
        RMQ_GCM_means[gmodel][SSP] = temp_df.mean(axis = 0)

In [None]:
# Adjusting data structure to match precip data
RMQs = {}

for SSP in scenarios:
    SSP_data = {}
    for gmodel in gmodels:
        gmodel_data = RMQ_GCM_means[gmodel][SSP]
        SSP_data[gmodel] = gmodel_data
        
    # Create DataFrame for the SSP
    SSP_df = pd.DataFrame(SSP_data)
    
    # Store DataFrame in the restructured dictionary
    RMQs[SSP] = SSP_df

Plotting RMQ vs normalized precipitation:

In [None]:
chosen_SSP = 'ssp245'
# Remove specified basins
rmq_data = RMQs[chosen_SSP]#.drop(index=basins_to_remove, errors='ignore')
precip_data = normalized_prcp_values#.drop(index=basins_to_remove, errors='ignore')

combined_data = {}

for gmodel in precip_data.keys():
    x = precip_data[gmodel].values.reshape(-1, 1)
    y = rmq_data[gmodel]
    combined_data[gmodel] = pd.DataFrame({'Precipitation': x.flatten(), 'RMQ': y})

# Concatenate all DataFrames into a single one
combined_df = pd.concat(combined_data.values(), keys=combined_data.keys())

# Reset index
combined_df.reset_index(level=0, inplace=True)
combined_df.rename(columns={'level_0': 'GModel'}, inplace=True)

In [None]:
chosen_SSP = 'ssp245'

# Create scatter plot
plt.figure(figsize=(10, 6))

# Remove specified basins
rmq_data = RMQs[chosen_SSP]#.drop(index=basins_to_remove, errors='ignore')
precip_data = normalized_prcp_values#.drop(index=basins_to_remove, errors='ignore')

# List to store correlation coefficients and p-values
regression_results = []

# Scatter plot for each gmodel
for gmodel, color in zip(gmodels, ['green', 'purple', 'blue']):
    x = precip_data[gmodel].values.reshape(-1, 1)
    y = rmq_data[gmodel]
    plt.scatter(x, y, color=color, label=gmodel, alpha = 0.45)  # Add label for legend

    # Perform linear regression
    model = LinearRegression().fit(x, y)
    y_pred = model.predict(x)
    
    # Calculate R^2 value
    r_squared = model.score(x, y)
    
    # Get slope of the line of best fit
    slope = model.coef_[0]

    regression_results.append({'Glacier Model': gmodel, 'R^2 Value': r_squared, 'Slope': slope})

# Convert the list of dictionaries to a DataFrame
regression_df = pd.DataFrame(regression_results)

# Annotate correlation coefficients and p-values in the upper left corner
text = '\n'.join(f"{row['Glacier Model']}: $R^2$ Value ={row['R^2 Value']:.3f}, Slope ={row['Slope']:.3f}" for index, row in regression_df.iterrows())
plt.text(0.01, 0.98, text, transform=plt.gca().transAxes, fontsize=9, verticalalignment='top')

# Add grey dotted lines passing through x=1 and y=1
plt.axvline(x=1, color='grey', linestyle='--')
plt.axhline(y=1, color='grey', linestyle='--')

# Plot labels and legend
plt.xlabel('Relative Precipitation')
plt.ylabel('Relative Runoff')
plt.title('Relative Precipitation vs. Relative Runoff', y = 1.08)

# Modify legend to have three columns and position it above the plot
plt.legend(ncol=3, loc='lower center', bbox_to_anchor=(0.5, 1.01))

name = 'relativeRF_vs_relativePRCP'
plt.savefig(f"/Users/finnwimberly/Desktop/Lizz Research/Paper Figs/{name}.pdf", dpi=300, bbox_inches='tight')

plt.show()

Loading in hist mean RF data:

In [None]:
fpath0 = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/'

scenarios = ['ssp126','ssp245','ssp370','ssp585']

modelnames = ['BCC-CSM2-MR', 'CESM2', 'CESM2-WACCM', 'EC-Earth3', 'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 
                  'INM-CM4-8', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']
RF_hist_mean = {}   
for s, SSP in enumerate(scenarios):
    RF_hist_mean[SSP] = {}
    for m, GCM in enumerate(modelnames):
        
        file = f"RF_hist_mean{GCM}_{SSP}.csv"
        temp_df = pd.read_csv(fpath0 + 'RF Historical Means/'+ file, index_col=0)
    
        # Store the combined_df in the dictionary for each SSP
        RF_hist_mean[SSP][GCM] = temp_df

In [None]:
#Create new dict to store multi-GCM means
hist_GCM_means = {}
for g, gmodel in enumerate(gmodels):
    hist_GCM_means[gmodel] = {}
    for s, SSP in enumerate(scenarios):
        all_GCMs = []
        for m, GCM in enumerate(modelnames):
            all_GCMs.append(RF_hist_mean[SSP][GCM][gmodel])
            
        # Calculate the mean for each year across the models
        temp_df = pd.DataFrame(all_GCMs)
        hist_GCM_means[gmodel][SSP] = temp_df.mean(axis = 0)

In [None]:
# Adjusting data structure to match precip data
RF_values = {}

for SSP in scenarios:
    SSP_data = {}
    for gmodel in gmodels:
        gmodel_data = hist_GCM_means[gmodel][SSP]
        SSP_data[gmodel] = gmodel_data
        
    # Create DataFrame for the SSP
    SSP_df = pd.DataFrame(SSP_data)
    
    # Store DataFrame in the restructured dictionary
    RF_values[SSP] = SSP_df

In [None]:
chosen_SSP = 'ssp245'

# Create scatter plot
plt.figure(figsize=(10, 6))

# List to store correlation coefficients, p-values, R^2 values, and slopes
regression_results = []

# Scatter plot for each gmodel
for gmodel, color in zip(gmodels, ['green', 'purple', 'blue']):
    x = all_precip_data[gmodel].values.reshape(-1, 1)
    y = RF_values[chosen_SSP][gmodel].values
    
    # Perform linear regression
    model = LinearRegression().fit(x, y)
    y_pred = model.predict(x)
    
    # Calculate R^2 value
    r_squared = model.score(x, y)
    
    # Get slope of the line of best fit
    slope = model.coef_[0]
    
    plt.scatter(x, y, color=color, label=gmodel, alpha=0.45)
    #plt.plot(x, y_pred, color=color, linestyle='dotted', linewidth=1, alpha=0.45)

    regression_results.append({'Glacier Model': gmodel, 'R^2 Value': r_squared, 'Slope': slope})

# Convert the list of dictionaries to a DataFrame
regression_df = pd.DataFrame(regression_results)

# Annotate R^2 values and slopes
text = '\n'.join(f"{row['Glacier Model']}: $R^2$={row['R^2 Value']:.3f}, Slope={row['Slope']:.4f}" for index, row in regression_df.iterrows())
plt.text(0.16, 0.979, text, transform=plt.gca().transAxes, fontsize=9, verticalalignment='top')

name = 'Historical Precipitation vs. Historical Runoff'

# Plot labels and legend
plt.xlabel(r'Average Annual Precipitation for the Historical Period $[Gt]$')
plt.ylabel(r'Average Annual Runoff for 2000-2020 $[Gt]$')
plt.title(f"{name}")
plt.legend()

plt.show()

Loading in mean RF data:

In [None]:
# Create an empty dictionary to store RMQ DataFrames for each GCM and SSP
fpath0 = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/'

scenarios = ['ssp126','ssp245','ssp370','ssp585']

modelnames = ['BCC-CSM2-MR', 'CESM2', 'CESM2-WACCM', 'EC-Earth3', 'EC-Earth3-Veg', 'FGOALS-f3-L', 'GFDL-ESM4', 
                  'INM-CM4-8', 'INM-CM5-0', 'MPI-ESM1-2-HR', 'MRI-ESM2-0', 'NorESM2-MM']
RF_mean = {}   
for s, SSP in enumerate(scenarios):
    RF_mean[SSP] = {}
    for m, GCM in enumerate(modelnames):
        
        file = f"RF_mean{GCM}_{SSP}.csv"
        temp_df = pd.read_csv(fpath0 + 'RF Means/'+ file, index_col=0)
    
        # Store the combined_df in the dictionary for each SSP
        RF_mean[SSP][GCM] = temp_df

In [None]:
#Create new dict to store multi-GCM means
RF_GCM_means = {}
for g, gmodel in enumerate(gmodels):
    RF_GCM_means[gmodel] = {}
    for s, SSP in enumerate(scenarios):
        all_GCMs = []
        for m, GCM in enumerate(modelnames):
            all_GCMs.append(RF_mean[SSP][GCM][gmodel])
            
        # Calculate the mean for each year across the models
        temp_df = pd.DataFrame(all_GCMs)
        RF_GCM_means[gmodel][SSP] = temp_df.mean(axis = 0)

In [None]:
# Adjusting data structure to match precip data
RF_values = {}

for SSP in scenarios:
    SSP_data = {}
    for gmodel in gmodels:
        gmodel_data = RF_GCM_means[gmodel][SSP]
        SSP_data[gmodel] = gmodel_data
        
    # Create DataFrame for the SSP
    SSP_df = pd.DataFrame(SSP_data)
    
    # Store DataFrame in the restructured dictionary
    RF_values[SSP] = SSP_df

In [None]:
chosen_SSP = 'ssp245'

# Create scatter plot
plt.figure(figsize=(10, 6))

# List to store correlation coefficients, p-values, R^2 values, and slopes
regression_results = []

# Scatter plot for each gmodel
for gmodel, color in zip(gmodels, ['green', 'purple', 'blue']):
    x = all_precip_data[gmodel].values.reshape(-1, 1)
    y = RF_values[chosen_SSP][gmodel].values
    
    # Perform linear regression
    model = LinearRegression().fit(x, y)
    y_pred = model.predict(x)
    
    # Calculate R^2 value
    r_squared = model.score(x, y)
    
    # Get slope of the line of best fit
    slope = model.coef_[0]
    
    plt.scatter(x, y, color=color, label=gmodel, alpha=0.45)
    #plt.plot(x, y_pred, color=color, linestyle='dotted', linewidth=1, alpha=0.45)

    regression_results.append({'Glacier Model': gmodel, 'R^2 Value': r_squared, 'Slope': slope})

# Convert the list of dictionaries to a DataFrame
regression_df = pd.DataFrame(regression_results)

# Annotate R^2 values and slopes
text = '\n'.join(f"{row['Glacier Model']}: $R^2$={row['R^2 Value']:.3f}, Slope={row['Slope']:.4f}" for index, row in regression_df.iterrows())
plt.text(0.16, 0.979, text, transform=plt.gca().transAxes, fontsize=9, verticalalignment='top')

name = 'Historical Precipitation vs. Average Runoff for 2000-2100'

# Plot labels and legend
plt.xlabel(r'Average Annual Precipitation for the Historical Period $[Gt]$')
plt.ylabel(r'Average Annual Runoff for 2000-2100 $[Gt]$')
plt.title(f"{name}")
plt.legend()

plt.savefig(f"/Users/finnwimberly/Desktop/Lizz Research/Paper Figs/{name}.png", dpi=300, bbox_inches='tight')

plt.show()

In [None]:
chosen_SSP = 'ssp245'

# Create scatter plot
plt.figure(figsize=(10, 6))

# List to store correlation coefficients, p-values, R^2 values, and slopes
regression_results = []

# Scatter plot for each gmodel
for gmodel, color in zip(gmodels, ['green', 'purple', 'blue']):
    x = all_precip_data[gmodel].values.reshape(-1, 1)
    y = RF_values[chosen_SSP][gmodel].values
    
    # Perform linear regression
    model = LinearRegression().fit(x, y)
    y_pred = model.predict(x)
    
    # Calculate R^2 value
    r_squared = model.score(x, y)
    
    # Get slope of the line of best fit
    slope = model.coef_[0]
    
    plt.scatter(x, y, color=color, label=gmodel, alpha=0.45)
    #plt.plot(x, y_pred, color=color, linestyle='dotted', linewidth=1, alpha=0.45)

    regression_results.append({'Glacier Model': gmodel, 'R^2 Value': r_squared, 'Slope': slope})

# Convert the list of dictionaries to a DataFrame
regression_df = pd.DataFrame(regression_results)

# Annotate R^2 values and slopes
text = '\n'.join(f"{row['Glacier Model']}: $R^2$={row['R^2 Value']:.3f}, Slope={row['Slope']:.4f}" for index, row in regression_df.iterrows())
plt.text(0.01, 0.979, text, transform=plt.gca().transAxes, fontsize=9, verticalalignment='top')
#plt.text(0.99, 0, text, transform=plt.gca().transAxes, fontsize=9, verticalalignment='bottom', horizontalalignment='right')

# Plot labels and legend
plt.xlabel(r'Average Annual Precipitation for the Historical Period $[Gt]$')
plt.ylabel(r'Average Annual Runoff for 2000-2100 $[Gt]$')

#plt.xlim(0.7e-2, 100)

# Set log-log scale
plt.xscale('log')
plt.yscale('log')

plt.legend(ncol=3, loc='lower center', bbox_to_anchor=(0.5, 1.01))
plt.title('Historical Precipitation vs. Average Runoff for 2000-2100', y = 1.08)

name = 'RF_vs_PRCP'
plt.savefig(f"/Users/finnwimberly/Desktop/Lizz Research/Paper Figs/{name}.pdf", dpi=300, bbox_inches='tight')

plt.show()