In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dask
import warnings
import xarray as xr
import glob
import os

fpath1 = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/Precipitation Analysis/global_ERA5_2000_2022/'
regions = ['01', '02', '06', '08', '10', '11', '12', '13', '14', '15', '16', '17', '18']


In [18]:
basins = {'RHINE':'6242', 'RHONE':'6243','PO':'6241', 'DANUBE':'6202', 'TITICACA':'3912', 'SANTA':'3425', 
            'OCONA':'3418', 'MAJES':'3416', 'MAGDALENA':'3227', 'AMAZON':'3203', 'YELCHO':'3429', 
            'VALDIVIA':'3428', 'SERRANO':'3426', 'RAPEL':'3423', 'PUELO':'3422', 'PASCUA':'3420', 
            'PALENA':'3419', 'HUASCO':'3412', 'COPIAPO':'3409', 'CISNES':'3408', 'BIOBIO':'3405', 'BAKER':'3404',
            'AZOPARDO':'3403', 'AISEN':'3401', 'SANTA CRUZ':'3244', 'NEGRO':'3232', 'COLORADO':'3212', 
            'CHICO':'3209', 'TORNEALVEN':'6255', 'THJORSA':'6254', 'OLFUSA':'6237', 'LULEALVEN':'6227', 
            'KUBAN':'6223', 'KALIXALVEN':'6219', 'GLOMAA':'6213', 'DRAMSELVA':'6209', 'SVARTA':'6110', 
            'LAGARFLJOT':'6104', 'JOKULSA A FJOLLUM':'6101', 'CLUTHA':'5406', 'YUKON':'4435', 'TAKU':'4431', 
             'SUSITNA':'4430','STIKINE':'4428', 'SKEENA':'4427','SKAGIT':'4426','NUSHAGAK':'4418','NASS':'4416',
            'KUSKOKWIM':'4414','FRASER':'4410', 'COPPER':'4408', 'COLUMBIA':'4406', 'ALSEK':'4401', 'NELSON':'4125', 
              'MACKENZIE':'4123','COLVILLE':'4110', 'YSYK-KOL':'2919', 'UVS NUUR':'2918', 'TARIM HE':'2914', 
              'TALAS':'2913', 'LAKE BALKHASH':'2910','HAR US NUUR':'2909', 'CHUY':'2905', 'ARAL SEA':'2902', 
              'YELLOW RIVER':'2434', 'MEKONG':'2421', 'KAMCHATKA':'2413', 'SALWEEN':'2319', 'IRRAWADDY':'2310', 
              'INDUS':'2309', 'GANGES':'2306','BRAHMAPUTRA':'2302', 'OB':'2108', 'INDIGIRKA':'2103','YANGTZE' : '2433'}

In [19]:
import json
def select_glaciers_json(basin='all'):
    '''
    Select glaciers within a basin by MRBID from a json-file,
    which is stored in the data directory.

    Args:
    -----
    basin: str
        String of MRBID or 'all'.

    Returns:
    --------
    If basin is 'all' a list of all relevant glaciers is returned, for
    initiating glacier simulations. If basin is a MRBID the list of glaciers
    within that basin is returned.
    
    Copy of a function written by Erik Holmgren (2022) in holmgren_gha.utils
    '''

    # fpath = './data/rgi_ids_per_basin.json'
    fpath = '/Users/finnwimberly/Library/CloudStorage/GoogleDrive-fwimberly@middlebury.edu/My Drive/Lizz Research Stuff/rgi_ids_per_basin.json'  
    with open(fpath) as f:
        basin_dict = json.load(f)

    if basin.lower() != 'all':
        glacier_list = basin_dict[basin]
    else:
        glacier_list = list(itertools.chain.from_iterable(basin_dict.values()))

    return glacier_list

In [20]:
basin_gls = {}
for basin, code in basins.items():
    basin_gls[basin] = select_glaciers_json(code)

#### Loading in liquid precipitation data

In [2]:
prcp_comps = {}

for region in regions:
    fpath2 = 'glac_prec_monthly/'
    file_pattern = os.path.join(fpath1, fpath2, f"{region}/", f"R{region}*.nc")
    file_list = glob.glob(file_pattern)
    
    datasets = []  # Create an empty list for each SSP
    if file_list:
        for file in file_list:
            ds = xr.open_dataset(file)
            ds = ds.glac_prec_monthly.load()
            datasets.append(ds)
    
        prcp_comps[region] = xr.concat(datasets, dim='glacier')  # Concatenate the datasets

In [77]:
# Sorting into basins
prcp_datasets = {}
for basin, glacier_list in basin_gls.items():
    ## loop over them all, drop the irrelevant IDs, and concatenate the result
    prcp_datasets[basin] = {}
    for r, region in enumerate(regions):
        ds_list = []
        try:
            ds_filtered = prcp_comps[region].where(prcp_comps[region].RGIId.isin(glacier_list), drop=True)
            #print(ds_filtered)
            ds_list.append(ds_filtered)
        except ValueError: ## happens if there are no glaciers from this batch in the selected region
            continue
        prcp_datasets[basin][region] = xr.concat(ds_list, dim='glacier')

In [78]:
# Calculate basin sums separately for each region
basin_prcp_sums_individual = {}
for basin, glacier_list in basin_gls.items():        
    basin_prcp_sums_individual[basin] = {}
    for region in regions:
        basin_prcp_sums_individual[basin][region] = prcp_datasets[basin][region].sum(dim='glacier')

In [80]:
#Combining basin sums for same basins that cross RGI region boundaries
basin_prcp_sums = {}
for b, basin in enumerate(basins):
    basin_prcp_sums[basin] = 0
    for r, region in enumerate(regions):
        basin_prcp_sums[basin] += basin_prcp_sums_individual[basin].get(region, 0)

#### Loading in solid precipitation data

In [81]:
acc_comps = {}

for region in regions:
    fpath2 = 'glac_acc_monthly/'
    file_pattern = os.path.join(fpath1, fpath2, f"{region}/", f"R{region}*.nc")
    file_list = glob.glob(file_pattern)
    
    datasets = []  # Create an empty list for each SSP
    if file_list:
        for file in file_list:
            ds = xr.open_dataset(file)
            ds = ds.glac_acc_monthly.load()
            datasets.append(ds)
    
        acc_comps[region] = xr.concat(datasets, dim='glacier')  # Concatenate the datasets

In [82]:
# Sorting into basins
acc_datasets = {}
for basin, glacier_list in basin_gls.items():
    ## loop over them all, drop the irrelevant IDs, and concatenate the result
    acc_datasets[basin] = {}
    for r, region in enumerate(regions):
        ds_list = []
        try:
            ds_filtered = acc_comps[region].where(acc_comps[region].RGIId.isin(glacier_list), drop=True)
            #print(ds_filtered)
            ds_list.append(ds_filtered)
        except ValueError: ## happens if there are no glaciers from this batch in the selected region
            continue
        acc_datasets[basin][region] = xr.concat(ds_list, dim='glacier')

In [83]:
# Calculate basin sums separately for each region
basin_acc_sums_individual = {}
for basin, glacier_list in basin_gls.items():        
    basin_acc_sums_individual[basin] = {}
    for region in regions:
        basin_acc_sums_individual[basin][region] = acc_datasets[basin][region].sum(dim='glacier')

In [84]:
#Combining basin sums for same basins that cross RGI region boundaries
basin_acc_sums = {}
for b, basin in enumerate(basins):
    basin_acc_sums[basin] = 0
    for r, region in enumerate(regions):
        basin_acc_sums[basin] += basin_acc_sums_individual[basin].get(region, 0)

In [85]:
#Combining solid and liquid components
total_prcp = {}
for b, basin in enumerate(basins):
    total_prcp[basin] = basin_acc_sums[basin] + basin_prcp_sums[basin]

In [87]:
avg_prcp = {}
for b, basin in enumerate(basins):
    yearly_sums = total_prcp[basin].resample(time='Y').sum()
    avg_prcp[basin] = yearly_sums.sel(time=slice('2000', '2019')).mean(dim='time')

In [89]:
precipitation_list = []

# Iterate over basins to extract the mean total precipitation
for basin in basins:
    # Extract the mean total precipitation for the current basin
    mean_total_precipitation = avg_prcp[basin].values.item()
    precipitation_list.append(mean_total_precipitation)

# Create a pandas Series with basin names as index and mean total precipitation as values
precipitation_series = pd.Series(precipitation_list, index=basins)

In [90]:
# Define the directory to save the CSV files
output_dir = '/Users/finnwimberly/Desktop/Lizz Research/CSV Outputs/Precipitation Analysis/'

fname = f"precip_avgs_Py.csv"

# Define the full path of the output file
output_path = os.path.join(output_dir, fname)

# Save the DataFrame as CSV
precipitation_series.to_csv(output_path, header=True, index=True)