# Setup

In [1]:
#load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
import glob
import csv

#get area
from glambie.const import regions, constants
from glambie.util import mass_height_conversions as mhc
from glambie.data.timeseries import Timeseries
from glambie.processing import processing_helpers as proc

#import urllib.request as url

#TO DO:
#do regional hydrological years

In [2]:
#set local path to 
local_path = 'C:/Users/jonathan/Documents/capstone_glambie/input/'
os.chdir(local_path)

#set output folder name
folder_name = ''

# get all folders
input_path = 'C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs'

folders = [
    name for name in os.listdir(input_path)
    if os.path.isdir(os.path.join(input_path, name))
]

#set RGI variable
rgi = 6

#start_date that you want the lines to meet + 0.5
#2023.5 for meets at end & 2012 for meets halfway
end_year = 2012.5 #2023.5

# Input and output directory

In [74]:
#set input and output directories
data_directory = os.path.join(local_path, 'processing/', folder_name)

#get a list of all the regions/folders excluding global
list_of_folders = sorted(os.listdir(data_directory)[1:20])

#directory global
global_dir = os.path.join(data_directory + '/', '0_global/consensus/csvs')

list_of_folders
#global_dir

['10_north_asia',
 '11_central_europe',
 '12_caucasus_middle_east',
 '13_central_asia',
 '14_south_asia_west',
 '15_south_asia_east',
 '16_low_latitudes',
 '17_southern_andes',
 '18_new_zealand',
 '19_antarctic_and_subantarctic',
 '1_alaska',
 '2_western_canada_us',
 '3_arctic_canada_north',
 '4_arctic_canada_south',
 '5_greenland_periphery',
 '6_iceland',
 '7_svalbard',
 '8_scandinavia',
 '9_russian_arctic']

In [6]:
#create output directory
#get path
for folder_name in folders:
    output_directory = os.path.join(local_path, 'glambie_runs_clean/', 'reanalysis-default_changed_uncertainties')

    #check if output directory already exists and otherwise create it
    if os.path.isdir(output_directory):
        print("Directory exist: " + output_directory)
    else:
        print("Directory does not exists. Creating new one: " + output_directory)
        os.makedirs(output_directory)
        os.makedirs(os.path.join(output_directory + '/', 'calendar_years'))
        os.makedirs(os.path.join(output_directory + '/', 'hydrological_years'))

Directory does not exists. Creating new one: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs_clean/reanalysis-default_changed_uncertainties
Directory exist: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs_clean/reanalysis-default_changed_uncertainties
Directory exist: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs_clean/reanalysis-default_changed_uncertainties
Directory exist: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs_clean/reanalysis-default_changed_uncertainties
Directory exist: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs_clean/reanalysis-default_changed_uncertainties
Directory exist: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs_clean/reanalysis-default_changed_uncertainties
Directory exist: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_runs_clean/reanalysis-default_changed_uncertainties
Directory exist: C:/Users/jonathan/Documents/capstone_glambie/input/glambie_

# Regional calendar years

In [58]:
folders

['datasets_default',
 'datasets_excluding_DUS-combined',
 'datasets_excluding_ETH',
 'datasets_excluding_WGMS-beta',
 'datasets_including_Harig_Group',
 'datasets_including_Jacob_2012_dmdt',
 'datasets_including_most',
 'datasets_including_most_except_WGMS_and_UHZ',
 'datasets_including_Sasgen_AWIarc_RL01_2',
 'datasets_including_Treichler_ICESat',
 'datasets_including_WGMS-mean_ba',
 'datasets_including_Wouters',
 'reanalysis_default_RGI_6_code_adjusted',
 'reanalysis_default_RGI_7',
 'reanalysis_RGI_6_default',
 'reanalysis_RGI_6_including_most',
 'reanalysis_RGI_6_including_most_except_WGMS_and_UHZ']

In [59]:
for folder_name in folders:    #set input and output directories
    data_directory = os.path.join(local_path, 'glambie_runs/', folder_name)

    #get a list of all the regions/folders excluding global
    list_of_folders = sorted(os.listdir(data_directory)[1:20])
    

    rename_dict = {
        'changes_gt': 'combined_gt',
        'errors_gt': 'combined_gt_errors',
        'changes_mwe': 'combined_mwe',
        'errors_mwe': 'combined_mwe_errors'
    }

    global_area = np.repeat(0,24)

    for i in range(0,len(list_of_folders)):
        #define region name and path to results
        region = list_of_folders[i]
        region_path = os.path.join(data_directory + '/', region + '/', 'consensus/csvs/')

        #get the region name without  the number
        region_name = region[region.find("_")+1:]

        #load in data calendar year
        calendar_gt = pd.read_csv(os.path.join(region_path, 'consensus_calendar_year_gt_' + region_name + '.csv'))
        calendar_mwe = pd.read_csv(os.path.join(region_path, 'consensus_calendar_year_mwe_' + region_name + '.csv'))

        #merge the data
        calendar_merged = calendar_gt.merge(calendar_mwe,
                                        left_on = ["start_dates", "end_dates"],
                                        right_on = ["start_dates", "end_dates"],
                                        suffixes=('_gt', '_mwe'))
        
        #add column for region
        calendar_merged["region"] = region_name

        #add column for glacier area
        calendar_merged["glacier_area"] = regions.REGIONS[region_name].get_adjusted_area(
            calendar_merged.start_dates, 
            calendar_merged.end_dates,
            rgi)
        
        #calculate global area
        global_area = global_area + calendar_merged["glacier_area"]
        
        #rename columns
        calendar_merged_final = calendar_merged.rename(columns=rename_dict)

        #reorder columns
        calendar_merged_final = calendar_merged_final.iloc[:, [0, 1,7, 6, 2, 3, 4, 5]]
        
        #save to csv
        calendar_merged_final.to_csv(os.path.join(output_directory + '/', 'calendar_years/' + region + '.csv') )
        print('data successfully saved for region: ', region_name)

data successfully saved for region:  north_asia
data successfully saved for region:  central_europe
data successfully saved for region:  caucasus_middle_east
data successfully saved for region:  central_asia
data successfully saved for region:  south_asia_west
data successfully saved for region:  south_asia_east
data successfully saved for region:  low_latitudes
data successfully saved for region:  southern_andes
data successfully saved for region:  new_zealand
data successfully saved for region:  antarctic_and_subantarctic
data successfully saved for region:  alaska
data successfully saved for region:  western_canada_us
data successfully saved for region:  arctic_canada_north
data successfully saved for region:  arctic_canada_south
data successfully saved for region:  greenland_periphery
data successfully saved for region:  iceland
data successfully saved for region:  svalbard
data successfully saved for region:  scandinavia
data successfully saved for region:  russian_arctic


ValueError: operands could not be broadcast together with shapes (24,) (22,) 

In [53]:
folder_name

'datasets_including_Jacob_2012_dmdt'

## check correctness

In [77]:
#check correctness
calendar_real = pd.read_csv('C:/Users/Milva/Documents/Uni Freiburg/Semester 2.1/capstone/glambie_results_20240716/calendar_years/0_global.csv')
calendar_real.head()

calendar_made = pd.read_csv('C:/Users/Milva/glambie/glambie/output_plots/clean_output/reanalysis_2025-11-18_16-33/calendar_years/0_global.csv')
calendar_made.head()

#check combined stufff -> all differences are lower than e^-13 so is probably good
com_gt = calendar_real.combined_gt - calendar_made.combined_gt #-13
com_gte = calendar_real.combined_gt_errors - calendar_made.combined_gt_errors #-14
com_mwe = calendar_real.combined_mwe - calendar_made.combined_mwe #-14
com_mwee = calendar_real.combined_mwe_errors - calendar_made.combined_mwe_errors #-16

com_gt
com_gte
com_mwe
com_mwee



#calendar_made.head()

0     1.110223e-16
1     0.000000e+00
2     1.110223e-16
3     9.714451e-17
4     0.000000e+00
5     0.000000e+00
6     0.000000e+00
7     0.000000e+00
8     0.000000e+00
9     0.000000e+00
10    0.000000e+00
11    0.000000e+00
12    0.000000e+00
13    0.000000e+00
14    0.000000e+00
15    0.000000e+00
16    0.000000e+00
17    0.000000e+00
18    0.000000e+00
19    0.000000e+00
20    0.000000e+00
21    0.000000e+00
22    0.000000e+00
23    0.000000e+00
Name: combined_mwe_errors, dtype: float64

# Regional hydrological years

In [89]:
rename_dict = {
    'changes_gt': 'combined_gt',
    'errors_gt': 'combined_gt_errors',
    'changes_mwe': 'combined_mwe',
    'errors_mwe': 'combined_mwe_errors'
}

method_list = ['altimetry', 'gravimetry', 'demdiff_and_glaciological']

for i in range(0,len(list_of_folders)):
    #define region name and path to results
    region = list_of_folders[i]
    region_path = os.path.join(data_directory + '/', region + '/', 'consensus/csvs/')

    #get the region name without  the number
    region_name = region[region.find("_") + 1:]

    #load in data hydrological year
    hydrological_gt = pd.read_csv(os.path.join(region_path, 'consensus_hydrological_year_gt_' + region_name + '.csv'))
    hydrological_mwe = pd.read_csv(os.path.join(region_path, 'consensus_hydrological_year_mwe_' + region_name + '.csv'))

    #merge the data
    hydrological_merged = hydrological_gt.merge(hydrological_mwe,
                                      left_on = ["start_dates", "end_dates"],
                                      right_on = ["start_dates", "end_dates"],
                                      suffixes=('_gt', '_mwe') )
    
    #add column for region
    hydrological_merged["region"] = region_name

    #add column for glacier area
    hydrological_merged["glacier_area"] = regions.REGIONS[region_name].get_adjusted_area(
        hydrological_merged.start_dates, 
        hydrological_merged.end_dates,
        rgi)
    
    #rename columns
    hydrological_merged = hydrological_merged.rename(columns=rename_dict)

    #reorder columns
    #hydrological_merged = hydrological_merged.iloc[:, [0, 1, 6, 2, 3, 4, 5]]
    hydrological_merged = hydrological_merged.iloc[:, [0, 1, 7, 6, 2, 3, 4, 5]]

    #retrieve glacier area  at reference year
    if rgi == 6:
       glacier_area_con = regions.REGIONS[region_name].rgi6_area
    else:
       glacier_area_con = regions.REGIONS[region_name].rgi7_area

     #retrieve glacier area uncertainty 
    area_unc = (glacier_area_con * regions.REGIONS[region_name].area_uncertainty_percentage)
    
    #add method columns
    for method in method_list:
        if method in os.listdir(os.path.join(data_directory + '/', region + '/')):
            #load in MWE data:
            #define path to data
            method_path = os.path.join(data_directory + '/', region + '/', method + '/', 'csvs/')

            #read in data
            method_mwe = pd.read_csv(os.path.join(method_path, method + '_final_with_area_change.csv'))
            
            #merge dataset
            hydrological_merged = hydrological_merged.merge(method_mwe,
                                                             how = 'left',
                                                             on = ["start_dates", "end_dates"],
                                                             suffixes=(None, "_mwe") )
            


            #rename columns
            rename_dict_method = {
                'changes': method + '_mwe',
                'errors': method + '_mwe_errors'
            }
            hydrological_merged = hydrological_merged.rename(columns=rename_dict_method)

            #add gt columns
            #######3
            
            #for GT changes
            #remove area change effect from mwe
            rem_area_change = hydrological_merged[method + '_mwe'] / (glacier_area_con/ hydrological_merged["glacier_area"])

            #convert mwe to gt and add column
            hydrological_merged[method + '_gt'] = np.array(mhc.meters_water_equivalent_to_gigatonnes(
                rem_area_change,
                glacier_area_con,
                constants.DENSITY_OF_WATER_KG_PER_M3))

            #for GT errors
            #remove area change effect from mwe
            #rem_area_change_er = hydrological_merged[method + '_mwe_errors'] / (glacier_area_con/ hydrological_merged["glacier_area"])

            #convert mwe to gt and add as temp column
            hydrological_merged[method + '_gt_errors'] = np.array(mhc.meters_water_equivalent_to_gigatonnes(
                hydrological_merged[method + '_mwe_errors'],
                glacier_area_con,
                constants.DENSITY_OF_WATER_KG_PER_M3))         

            #calculate total uncertainties
            uncertainties_gt = (hydrological_merged[method + '_gt'].abs()
                                * ((hydrological_merged[method + '_gt_errors'] / hydrological_merged[method + '_gt']) ** 2 
                                   + (area_unc / glacier_area_con) ** 2) ** 0.5)

            #include in dataframe
            hydrological_merged[method + '_gt_errors'] = np.array(uncertainties_gt)
            
        else:
            hydrological_merged[method + '_gt'] = np.nan
            hydrological_merged[method + '_gt_errors'] = np.nan
            hydrological_merged[method + '_mwe'] = np.nan
            hydrological_merged[method + '_mwe_errors'] = np.nan
            hydrological_merged[method + '_annual_variability'] = np.nan

    print(region)
    hydrological_merged.info()
   #print(hydrological_merged.head())

    #save to csv
    hydrological_merged.to_csv(os.path.join(output_directory + '/', 'hydrological_years/' + region + '.csv') )
    
    print('data successfully saved for region: ', region_name)


hydrological_merged.head()

10_north_asia
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 22 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   start_dates                           25 non-null     float64
 1   end_dates                             25 non-null     float64
 2   glacier_area                          25 non-null     float64
 3   region                                25 non-null     object 
 4   combined_gt                           25 non-null     float64
 5   combined_gt_errors                    25 non-null     float64
 6   combined_mwe                          25 non-null     float64
 7   combined_mwe_errors                   25 non-null     float64
 8   altimetry_gt                          0 non-null      float64
 9   altimetry_gt_errors                   0 non-null      float64
 10  altimetry_mwe                         0 non-null      float64
 11  altimet

Unnamed: 0,start_dates,end_dates,glacier_area,region,combined_gt,combined_gt_errors,combined_mwe,combined_mwe_errors,altimetry_mwe,altimetry_mwe_errors,altimetry_gt,altimetry_gt_errors,gravimetry_mwe,gravimetry_mwe_errors,gravimetry_gt,gravimetry_gt_errors,demdiff_and_glaciological_mwe,demdiff_and_glaciological_mwe_errors,demdiff_and_glaciological_gt,demdiff_and_glaciological_gt_errors
0,1999.75,2000.75,51622.9552,russian_arctic,8.178914,16.651969,0.158912,0.323636,,,,,,,,,0.189661,0.194114,9.761501,9.996585
1,2000.75,2001.75,51581.6816,russian_arctic,-17.382187,14.521436,-0.337998,0.281808,,,,,,,,,-0.307249,0.111154,-15.800866,5.77179
2,2001.75,2002.75,51540.408,russian_arctic,-5.335166,14.39953,-0.103826,0.279896,,,,,,,,,-0.073077,0.106214,-3.755109,5.466555
3,2002.75,2003.75,51499.1344,russian_arctic,-9.592279,10.147191,-0.186821,0.197053,,,,,-0.121138,0.102138,-6.219761,5.262907,-0.273771,0.103761,-14.056648,5.383276
4,2003.75,2004.75,51457.8608,russian_arctic,-23.455771,8.308643,-0.457196,0.159913,-0.468253,0.074944,-24.02301,4.037697,-0.486816,0.102355,-24.975352,5.410906,-0.41652,0.113464,-21.368951,5.933287


## check correctness

In [91]:
#check correctness
hydro_real = pd.read_csv('C:/Users/Milva/Documents/Uni Freiburg/Semester 2.1/capstone/glambie_results_20240716/hydrological_years/9_russian_arctic.csv')
hydro_real.head()

hydro_made = pd.read_csv('C:/Users/Milva/glambie/glambie/output_plots/clean_output/reanalysis_2025-11-18_16-33/hydrological_years/9_russian_arctic.csv')
hydro_made.head()

#check combined stufff -> all differences are lower than e^-15 so is probably good
com_gt = hydro_real.combined_gt - hydro_made.combined_gt
com_gte = hydro_real.combined_gt_errors - hydro_made.combined_gt_errors
com_mwe = hydro_real.combined_mwe - hydro_made.combined_mwe
com_mwee = hydro_real.combined_mwe_errors - hydro_made.combined_mwe_errors

com_gt
com_gte
com_mwe
com_mwee



#hydro_made.head()

0    -1.110223e-16
1     0.000000e+00
2    -5.551115e-17
3     0.000000e+00
4     0.000000e+00
5     0.000000e+00
6    -8.326673e-17
7    -1.110223e-16
8     0.000000e+00
9    -1.110223e-16
10    0.000000e+00
11    0.000000e+00
12    0.000000e+00
13    0.000000e+00
14    0.000000e+00
15   -1.110223e-16
16   -1.110223e-16
17    0.000000e+00
18   -1.110223e-16
19    0.000000e+00
20    0.000000e+00
21   -1.110223e-16
22    0.000000e+00
23    0.000000e+00
24             NaN
Name: combined_mwe_errors, dtype: float64

In [92]:
#check mwe stufff -> all differences are lower than e^-16 so is probably good
alt_mwe = hydro_real.altimetry_mwe - hydro_made.altimetry_mwe #-17
alt_mwe_err = hydro_real.altimetry_mwe_errors - hydro_made.altimetry_mwe_errors
grav_mwe = hydro_real.gravimetry_mwe - hydro_made.gravimetry_mwe #-16
grav_mwe_err = hydro_real.gravimetry_mwe_errors - hydro_made.gravimetry_mwe_errors
dem_mwe = hydro_real.demdiff_and_glaciological_mwe - hydro_made.demdiff_and_glaciological_mwe #-16
dem_mwe_err = hydro_real.demdiff_and_glaciological_mwe_errors - hydro_made.demdiff_and_glaciological_mwe_errors #-17


alt_mwe
alt_mwe_err
grav_mwe
grav_mwe_err
dem_mwe
dem_mwe_err

0     0.000000e+00
1     0.000000e+00
2     0.000000e+00
3     0.000000e+00
4     0.000000e+00
5     0.000000e+00
6     0.000000e+00
7     0.000000e+00
8     9.714451e-17
9     0.000000e+00
10    0.000000e+00
11    0.000000e+00
12    0.000000e+00
13    0.000000e+00
14    0.000000e+00
15    0.000000e+00
16    0.000000e+00
17    0.000000e+00
18    0.000000e+00
19    0.000000e+00
20    0.000000e+00
21    0.000000e+00
22    0.000000e+00
23    0.000000e+00
24             NaN
Name: demdiff_and_glaciological_mwe_errors, dtype: float64

In [95]:
#check gt stufff -> all differences are lower than e^-16 so is probably good
alt_gt = hydro_real.altimetry_gt - hydro_made.altimetry_gt #-15
alt_gt_err = hydro_real.altimetry_gt_errors - hydro_made.altimetry_gt_errors
grav_gt = hydro_real.gravimetry_gt - hydro_made.gravimetry_gt #-15
grav_gt_err = hydro_real.gravimetry_gt_errors - hydro_made.gravimetry_gt_errors
dem_gt = hydro_real.demdiff_and_glaciological_gt - hydro_made.demdiff_and_glaciological_gt #-15
dem_gt_err = hydro_real.demdiff_and_glaciological_gt_errors - hydro_made.demdiff_and_glaciological_gt_errors #-17


#alt_gt
#alt_gt_err
#grav_gt
#grav_gt_err
#dem_gt
dem_gt_err

0     0.000000e+00
1     0.000000e+00
2     0.000000e+00
3    -8.881784e-16
4     0.000000e+00
5     0.000000e+00
6     0.000000e+00
7     8.881784e-16
8     4.440892e-15
9     0.000000e+00
10    0.000000e+00
11    0.000000e+00
12    0.000000e+00
13    0.000000e+00
14    0.000000e+00
15    0.000000e+00
16    0.000000e+00
17    0.000000e+00
18    0.000000e+00
19    0.000000e+00
20    0.000000e+00
21    0.000000e+00
22    0.000000e+00
23    0.000000e+00
24             NaN
Name: demdiff_and_glaciological_gt_errors, dtype: float64

In [96]:
#to do: 
#- I can only successfully add two columns so far

#columns in notebook
#start_dates,end_dates,glacier_area,region,combined_gt,combined_gt_errors,combined_mwe,combined_mwe_errors,

#altimetry_gt,
#altimetry_gt_errors,
#altimetry_mwe -> in altimetry_final_with_area_change
#altimetry_mwe_errors -> in altimetry_final_with_area_change
#altimetry_annual_variability, -> NaN or 1. If method is nto present its NAN but if it is, it can either be NAN or 1, I'm uncertain of what it is.

#gravimetry_gt,gravimetry_gt_errors,gravimetry_mwe,gravimetry_mwe_errors,gravimetry_annual_variability,
#demdiff_and_glaciological_gt,demdiff_and_glaciological_gt_errors,demdiff_and_glaciological_mwe,demdiff_and_glaciological_mwe_errors,demdiff_and_glaciological_annual_variability


#columns in results
#start_dates,end_dates,changes,errors


# Global

In [97]:
#load in global datasets
global_gt = pd.read_csv(os.path.join(global_dir, 'global_gt.csv'))
global_mwe = pd.read_csv(os.path.join(global_dir, 'global_mwe.csv'))

global_gt.head()
global_gt.info()

global_mwe.head()
global_mwe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   start_dates  24 non-null     float64
 1   end_dates    24 non-null     float64
 2   changes      24 non-null     float64
 3   errors       24 non-null     float64
dtypes: float64(4)
memory usage: 900.0 bytes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   start_dates  24 non-null     float64
 1   end_dates    24 non-null     float64
 2   changes      24 non-null     float64
 3   errors       24 non-null     float64
dtypes: float64(4)
memory usage: 900.0 bytes


In [98]:
#merge data together
merged_global = global_gt.merge(global_mwe,
                                left_on = ["start_dates", "end_dates"],
                                right_on = ["start_dates", "end_dates"],
                                suffixes=('_gt', '_mwe') )

merged_global.info()
merged_global.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   start_dates  24 non-null     float64
 1   end_dates    24 non-null     float64
 2   changes_gt   24 non-null     float64
 3   errors_gt    24 non-null     float64
 4   changes_mwe  24 non-null     float64
 5   errors_mwe   24 non-null     float64
dtypes: float64(6)
memory usage: 1.3 KB


Unnamed: 0,start_dates,end_dates,changes_gt,errors_gt,changes_mwe,errors_mwe
0,2000.0,2001.0,-78.044147,111.560951,-0.111179,0.158362
1,2001.0,2002.0,-215.169311,95.300101,-0.307516,0.13505
2,2002.0,2003.0,-128.466367,91.146345,-0.1842,0.12893
3,2003.0,2004.0,-192.67247,72.798877,-0.277163,0.102895
4,2004.0,2005.0,-229.236911,73.022158,-0.330842,0.102844


In [99]:
#add region and glacier area
merged_global["region"] = "global"
merged_global["glacier_area"] = global_area

merged_global.info()
merged_global.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   start_dates   24 non-null     float64
 1   end_dates     24 non-null     float64
 2   changes_gt    24 non-null     float64
 3   errors_gt     24 non-null     float64
 4   changes_mwe   24 non-null     float64
 5   errors_mwe    24 non-null     float64
 6   region        24 non-null     object 
 7   glacier_area  24 non-null     float64
dtypes: float64(7), object(1)
memory usage: 1.6+ KB


Unnamed: 0,start_dates,end_dates,changes_gt,errors_gt,changes_mwe,errors_mwe,region,glacier_area
0,2000.0,2001.0,-78.044147,111.560951,-0.111179,0.158362,global,704082.69565
1,2001.0,2002.0,-215.169311,95.300101,-0.307516,0.13505,global,701805.49875
2,2002.0,2003.0,-128.466367,91.146345,-0.1842,0.12893,global,699528.30185
3,2003.0,2004.0,-192.67247,72.798877,-0.277163,0.102895,global,697251.10495
4,2004.0,2005.0,-229.236911,73.022158,-0.330842,0.102844,global,694973.90805


In [100]:
#rename columns
rename_dict = {
    'changes_gt': 'combined_gt',
    'errors_gt': 'combined_gt_errors',
    'changes_mwe': 'combined_mwe',
    'errors_mwe': 'combined_mwe_errors'
}

merged_global_final = merged_global.rename(columns=rename_dict)

#reorder columns
merged_global_final = merged_global_final.iloc[:, [0, 1, 7, 6, 2, 3, 4, 5]]
merged_global_final.head()

Unnamed: 0,start_dates,end_dates,glacier_area,region,combined_gt,combined_gt_errors,combined_mwe,combined_mwe_errors
0,2000.0,2001.0,704082.69565,global,-78.044147,111.560951,-0.111179,0.158362
1,2001.0,2002.0,701805.49875,global,-215.169311,95.300101,-0.307516,0.13505
2,2002.0,2003.0,699528.30185,global,-128.466367,91.146345,-0.1842,0.12893
3,2003.0,2004.0,697251.10495,global,-192.67247,72.798877,-0.277163,0.102895
4,2004.0,2005.0,694973.90805,global,-229.236911,73.022158,-0.330842,0.102844


In [101]:
#save to csv
merged_global_final.to_csv(os.path.join(output_directory + '/', 'calendar_years/0_global.csv') )


# Combined function

In [4]:
def process_glambie_folder(folder_name, local_path, rgi=6):
    data_directory = os.path.join(local_path, 'glambie_runs/', folder_name)
    output_directory = os.path.join(local_path, 'glambie_runs_clean/', folder_name)

    list_of_folders = sorted(os.listdir(data_directory)[1:20])

    rename_dict = {
        'changes_gt': 'combined_gt',
        'errors_gt': 'combined_gt_errors',
        'changes_mwe': 'combined_mwe',
        'errors_mwe': 'combined_mwe_errors'
    }

    # Load global calendar as reference so all regions align to same (start_dates, end_dates)
    global_dir = os.path.join(data_directory, '0_global', 'consensus', 'csvs')
    ref_calendar = pd.read_csv(os.path.join(global_dir, 'global_gt.csv'), usecols=['start_dates', 'end_dates'])
    n_global = len(ref_calendar)
    global_area = np.zeros(n_global)

    for i in range(0, len(list_of_folders)):
        #define region name and path to results
        region = list_of_folders[i]
        region_path = os.path.join(data_directory + '/', region + '/', 'consensus/csvs/')
        #get the region name without  the number
        region_name = region[region.find("_")+1:]

        #load in data calendar year
        calendar_gt = pd.read_csv(os.path.join(region_path, 'consensus_calendar_year_gt_' + region_name + '.csv'))
        calendar_mwe = pd.read_csv(os.path.join(region_path, 'consensus_calendar_year_mwe_' + region_name + '.csv'))
        #merge the data
        calendar_merged = calendar_gt.merge(calendar_mwe,
                                        left_on = ["start_dates", "end_dates"],
                                        right_on = ["start_dates", "end_dates"],
                                        suffixes=('_gt', '_mwe'))
        #add column for region
        calendar_merged["region"] = region_name
        #add column for glacier area
        calendar_merged["glacier_area"] = regions.REGIONS[region_name].get_adjusted_area(
            calendar_merged.start_dates,
            calendar_merged.end_dates,
            rgi)

        #calculate global area: align this region to global calendar (same start_dates, end_dates) then add
        region_area = calendar_merged[['start_dates', 'end_dates', 'glacier_area']]
        aligned = ref_calendar.merge(region_area, on=['start_dates', 'end_dates'], how='left')
        aligned['glacier_area'] = aligned['glacier_area'].fillna(0)
        global_area = global_area + aligned['glacier_area'].values

        #rename columns
        calendar_merged_final = calendar_merged.rename(columns=rename_dict)
        #reorder columns (by name so it works even if column count/order varies by region)
        final_cols = ['start_dates', 'end_dates', 'glacier_area', 'region', 'combined_gt', 'combined_gt_errors', 'combined_mwe', 'combined_mwe_errors']
        calendar_merged_final = calendar_merged_final[[c for c in final_cols if c in calendar_merged_final.columns]]
        #save to csv
        calendar_merged_final.to_csv(os.path.join(output_directory + '/', 'calendar_years/' + region + '.csv'))
        print('data successfully saved for region: ', region_name, 'in folder: ', folder_name)

    #load in global datasets
    global_gt = pd.read_csv(os.path.join(global_dir, 'global_gt.csv'))
    global_mwe = pd.read_csv(os.path.join(global_dir, 'global_mwe.csv'))
    #merge data together
    merged_global = global_gt.merge(global_mwe,
                                left_on = ["start_dates", "end_dates"],
                                right_on = ["start_dates", "end_dates"],
                                suffixes=('_gt', '_mwe'))
    #add region and glacier area
    merged_global["region"] = "global"
    merged_global["glacier_area"] = global_area
    #rename columns
    merged_global_final = merged_global.rename(columns=rename_dict)
    #reorder columns
    merged_global_final = merged_global_final.iloc[:, [0, 1, 7, 6, 2, 3, 4, 5]]
    merged_global_final.to_csv(os.path.join(output_directory + '/', 'calendar_years/0_global.csv'))

    method_list = ['altimetry', 'gravimetry', 'demdiff_and_glaciological']
    for i in range(0, len(list_of_folders)):
        #define region name and path to results
        region = list_of_folders[i]
        region_path = os.path.join(data_directory + '/', region + '/', 'consensus/csvs/')
        #get the region name without  the number
        region_name = region[region.find("_") + 1:]

        #load in data hydrological year
        hydrological_gt = pd.read_csv(os.path.join(region_path, 'consensus_hydrological_year_gt_' + region_name + '.csv'))
        hydrological_mwe = pd.read_csv(os.path.join(region_path, 'consensus_hydrological_year_mwe_' + region_name + '.csv'))
        #merge the data
        hydrological_merged = hydrological_gt.merge(hydrological_mwe,
                                      left_on = ["start_dates", "end_dates"],
                                      right_on = ["start_dates", "end_dates"],
                                      suffixes=('_gt', '_mwe'))
        #add column for region
        hydrological_merged["region"] = region_name
        #add column for glacier area
        hydrological_merged["glacier_area"] = regions.REGIONS[region_name].get_adjusted_area(
            hydrological_merged.start_dates,
            hydrological_merged.end_dates,
            rgi)
        #rename columns
        hydrological_merged = hydrological_merged.rename(columns=rename_dict)
        #reorder columns (by name so it works even if column count/order varies by region)
        final_cols_hydro = ['start_dates', 'end_dates', 'glacier_area', 'region', 'combined_gt', 'combined_gt_errors', 'combined_mwe', 'combined_mwe_errors']
        hydrological_merged = hydrological_merged[[c for c in final_cols_hydro if c in hydrological_merged.columns]]

        #retrieve glacier area  at reference year
        if rgi == 6:
           glacier_area_con = regions.REGIONS[region_name].rgi6_area
        else:
           glacier_area_con = regions.REGIONS[region_name].rgi7_area
        #retrieve glacier area uncertainty
        area_unc = (glacier_area_con * regions.REGIONS[region_name].area_uncertainty_percentage)

        #add method columns
        for method in method_list:
            if method in os.listdir(os.path.join(data_directory + '/', region + '/')):
                #load in MWE data:
                #define path to data
                method_path = os.path.join(data_directory + '/', region + '/', method + '/', 'csvs/')
                #read in data
                method_mwe = pd.read_csv(os.path.join(method_path, method + '_final_with_area_change.csv'))
                #merge dataset
                hydrological_merged = hydrological_merged.merge(method_mwe,
                                                             how = 'left',
                                                             on = ["start_dates", "end_dates"],
                                                             suffixes=(None, "_mwe"))
                #rename columns
                rename_dict_method = {
                    'changes': method + '_mwe',
                    'errors': method + '_mwe_errors'
                }
                hydrological_merged = hydrological_merged.rename(columns=rename_dict_method)
                #add gt columns
                #for GT changes
                #remove area change effect from mwe
                rem_area_change = hydrological_merged[method + '_mwe'] / (glacier_area_con/ hydrological_merged["glacier_area"])
                #convert mwe to gt and add column
                hydrological_merged[method + '_gt'] = np.array(mhc.meters_water_equivalent_to_gigatonnes(
                    rem_area_change,
                    glacier_area_con,
                    constants.DENSITY_OF_WATER_KG_PER_M3))
                #for GT errors
                #convert mwe to gt and add as temp column
                hydrological_merged[method + '_gt_errors'] = np.array(mhc.meters_water_equivalent_to_gigatonnes(
                    hydrological_merged[method + '_mwe_errors'],
                    glacier_area_con,
                    constants.DENSITY_OF_WATER_KG_PER_M3))
                #calculate total uncertainties
                uncertainties_gt = (hydrological_merged[method + '_gt'].abs()
                                * ((hydrological_merged[method + '_gt_errors'] / hydrological_merged[method + '_gt']) ** 2
                                   + (area_unc / glacier_area_con) ** 2) ** 0.5)
                #include in dataframe
                hydrological_merged[method + '_gt_errors'] = np.array(uncertainties_gt)
            else:
                hydrological_merged[method + '_gt'] = np.nan
                hydrological_merged[method + '_gt_errors'] = np.nan
                hydrological_merged[method + '_mwe'] = np.nan
                hydrological_merged[method + '_mwe_errors'] = np.nan
                hydrological_merged[method + '_annual_variability'] = np.nan

        #save to csv
        hydrological_merged.to_csv(os.path.join(output_directory + '/', 'hydrological_years/' + region + '.csv'))
        print('data successfully saved for region: ', region_name, 'in folder: ', folder_name)

In [7]:
process_glambie_folder('reanalysis-default_changed_uncertainties', local_path, rgi=6)

data successfully saved for region:  north_asia in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  central_europe in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  caucasus_middle_east in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  central_asia in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  south_asia_west in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  south_asia_east in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  low_latitudes in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  southern_andes in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  new_zealand in folder:  reanalysis-default_changed_uncertainties
data successfully saved for region:  antarctic_and_subant

KeyError: 'altimetry_mwe_errors'