In [1]:
import os
import gc
import sys
import glob
import numpy as np
import pandas as pd
import netCDF4 as nc
from datetime import datetime, timedelta
from matplotlib.cm import get_cmap
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib import colors
import matplotlib.ticker as mticker
from copy import deepcopy
from scipy.stats import percentileofscore

In [2]:
# To use PLUMBER2_GPP_common_utils, change directory to where it exists
os.chdir('/g/data/w97/mm3972/scripts/PLUMBER2/LSM_VPD_PLUMBER2')
from PLUMBER2_VPD_common_utils import *

In [3]:
# Read quality contral file
file_name1 = "/g/data/w97/mm3972/scripts/PLUMBER2/LSM_VPD_PLUMBER2/txt/process1_output/Qle_all_sites.csv" 
file_name2 = "/g/data/w97/mm3972/scripts/PLUMBER2/LSM_VPD_PLUMBER2/txt/process2_output/data_selection_all_sites.csv" 
Qle_input  = pd.read_csv(file_name1)
data_selection_input = pd.read_csv(file_name2)

In [28]:
site_names, IGBP_types, clim_types, model_names = load_default_list()
for site_name in site_names:
    site_mask = (data_selection_input['site_name']==site_name)
    print(site_name, np.sum(data_selection_input.loc[site_mask, 'select_data']))

AR-SLu 4797
AT-Neu 10671
AU-ASM 31081
AU-Cow 12219
AU-Cpr 33798
AU-Ctr 16568
AU-Cum 19291
AU-DaP 15741
AU-DaS 33970
AU-Dry 20918
AU-Emr 10547
AU-GWW 17816
AU-Gin 26340
AU-How 51142
AU-Lit 7797
AU-Otw 1732
AU-Rig 0
AU-Rob 0
AU-Sam 20424
AU-Stp 38956
AU-TTE 22705
AU-Tum 16852
AU-Whr 0
AU-Wrr 0
AU-Ync 0
BE-Bra 28178
BE-Lon 20659
BE-Vie 35643
BR-Sa3 9201
BW-Ma1 5859
CA-NS1 0
CA-NS2 0
CA-NS4 0
CA-NS5 0
CA-NS6 0
CA-NS7 0
CA-Qcu 5685
CA-Qfo 9511
CA-SF1 0
CA-SF2 0
CA-SF3 0
CH-Cha 9060
CH-Dav 22922
CH-Fru 11139
CH-Oe1 10803
CN-Cha 4019
CN-Cng 5186
CN-Dan 1346
CN-Din 0
CN-Du2 3549
CN-HaM 622
CN-Qia 7159
CZ-wet 10179
DE-Bay 4621
DE-Geb 27073
DE-Gri 16124
DE-Hai 21219
DE-Kli 19578
DE-Meh 6370
DE-Obe 14664
DE-Seh 5679
DE-SfN 2273
DE-Tha 38311
DE-Wet 9908
DK-Fou 969
DK-Lva 4562
DK-Ris 3504
DK-Sor 40423
DK-ZaH 1127
ES-ES1 41383
ES-ES2 8391
ES-LMa 13019
ES-LgS 2072
ES-VDA 2228
FI-Hyy 35298
FI-Kaa 4043
FI-Lom 2752
FI-Sod 10896
FR-Fon 22860
FR-Gri 21475
FR-Hes 18018
FR-LBr 16910
FR-Lq1 6086
FR-Lq2 6291


In [5]:
site_names, IGBP_types, clim_types, model_names = load_default_list()
model_names = ['model_CABLE', 'model_CABLE-POP-CN', 'model_CHTESSEL_Ref_exp1', 'model_CLM5a', 'model_GFDL',
               'model_JULES_GL9', 'model_JULES_GL9_withLAI', 'model_MATSIRO', 'model_MuSICA', 'model_NASAEnt',
               'model_NoahMPv401', 'model_ORC2_r6593', 'model_ORC3_r8120', 'model_QUINCY', 'model_STEMMUS-SCOPE','obs']
for site_name in site_names:
    file_name3     = f"/g/data/w97/mm3972/scripts/PLUMBER2/LSM_VPD_PLUMBER2/txt/process3_output/curves/raw_data_Qle_VPD_hourly_SM_per_all_models_0-15th_data_selected_{site_name}.csv"
    process3_input = pd.read_csv(file_name3)
    
    for model_name in model_names:
        tmp = pd.to_numeric(process3_input[model_name], errors='coerce')
        if np.sum(~np.isnan(tmp)) > 0:  
            # print(site_name,model_name,np.unique(process3_input[model_name]))
            print(site_name,model_name,np.sum(~np.isnan(tmp)))

AT-Neu model_CABLE 3340
AT-Neu model_CABLE-POP-CN 3340
AT-Neu model_CHTESSEL_Ref_exp1 2808
AT-Neu model_CLM5a 2763
AT-Neu model_GFDL 2893
AT-Neu model_JULES_GL9 2840
AT-Neu model_JULES_GL9_withLAI 2845
AT-Neu model_MATSIRO 3530
AT-Neu model_MuSICA 3013
AT-Neu model_NASAEnt 3158
AT-Neu model_NoahMPv401 2953
AT-Neu model_ORC2_r6593 3308
AT-Neu model_ORC3_r8120 3272
AT-Neu model_QUINCY 3158
AT-Neu model_STEMMUS-SCOPE 3124
AT-Neu obs 3158
AU-Ctr model_CABLE 4385
AU-Ctr model_CABLE-POP-CN 4385
AU-Ctr model_CHTESSEL_Ref_exp1 4224
AU-Ctr model_CLM5a 3530
AU-Ctr model_GFDL 3812
AU-Ctr model_JULES_GL9 4095
AU-Ctr model_JULES_GL9_withLAI 3948
AU-Ctr model_MATSIRO 3749
AU-Ctr model_MuSICA 3937
AU-Ctr model_NASAEnt 4116
AU-Ctr model_NoahMPv401 4547
AU-Ctr model_ORC2_r6593 4312
AU-Ctr model_ORC3_r8120 4394
AU-Ctr model_QUINCY 4116
AU-Ctr model_STEMMUS-SCOPE 3992
AU-Ctr obs 4116
AU-DaP model_CABLE 3122
AU-DaP model_CABLE-POP-CN 3122
AU-DaP model_CHTESSEL_Ref_exp1 3017
AU-DaP model_CLM5a 3598
AU-DaP 

In [36]:
add_SMtopXm='0.3'
low_bound  = [0,15]
high_bound = [85,100]

'''
Be carefull: this script select the time steps that all 13 models with SM simulations are within the
required ranges, rather than the models with the variable (such as TVeg, non TVeg have fewer models).
That's the reason use model_names rather than model_out_list
'''

for site_name in site_names:
    site_mask = (data_selection_input['site_name']==site_name)
    all_mask = (site_mask) & (data_selection_input['select_data'])

    tmp_input = Qle_input[all_mask]
    var_output_dry = copy.deepcopy(tmp_input)
    var_output_wet = copy.deepcopy(tmp_input)

    print(site_name,len(var_output_dry))
# # select time step where obs_EF isn't NaN (when Qh<0 or Qle+Qh<10)
# for i, model_out_name in enumerate(model_names):
#     if 'obs' in model_out_name:
#         head = ''
#     else:
#         head = 'model_'
#     SM_per_name = model_out_name+'_SMtop'+str(add_SMtopXm)+'m_percentile'

#     if len(low_bound)>1 and len(high_bound)>1:
#         dry_mask  = (var_output[SM_per_name] > low_bound[0]) & (var_output[SM_per_name] < low_bound[1])
#         wet_mask  = (var_output[SM_per_name] > high_bound[0]) & (var_output[SM_per_name] < high_bound[1])
#     elif len(low_bound)==1 and len(high_bound)==1:
#         dry_mask  = (var_output[SM_per_name] < low_bound)
#         wet_mask  = (var_output[SM_per_name] > high_bound)
#     else:
#         sys.exit('len(low_bound)=',len(low_bound),'len(high_bound)=',len(high_bound))

#     var_output_dry[head+model_out_name] = np.where(dry_mask, var_output[head+model_out_name], np.nan)
#     var_output_wet[head+model_out_name] = np.where(wet_mask, var_output[head+model_out_name], np.nan)

# SM_per_model_names = ['CABLE_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'CABLE-POP-CN_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'CHTESSEL_Ref_exp1_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'CLM5a_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'GFDL_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'JULES_GL9_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'JULES_GL9_withLAI_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'MATSIRO_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'MuSICA_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'NoahMPv401_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'ORC2_r6593_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'ORC3_r8120_SMtop'+str(add_SMtopXm)+'m_percentile',
#                       'STEMMUS-SCOPE_SMtop'+str(add_SMtopXm)+'m_percentile']

# # Print the expected column names
# print("Expected columns (SM_per_model_names):")
# print(SM_per_model_names)

# # Print the actual columns in the DataFrame
# print("Columns in var_output_dry:")
# print(var_output_dry.columns)
# # return

# # Mask out SM is inconsistent
# var_output_dry[SM_per_model_names] = var_output_dry[SM_per_model_names].where(~var_output_dry[SM_per_model_names].isna().any(axis=1), other=np.nan)
# var_output_wet[SM_per_model_names] = var_output_wet[SM_per_model_names].where(~var_output_wet[SM_per_model_names].isna().any(axis=1), other=np.nan)

# for model_out_name in model_out_list:
#     if 'obs' in model_out_name:
#         head = ''
#     else:
#         head = 'model_'
#     var_output_dry.loc[:,head+model_out_name] = np.where(~np.isnan(var_output_dry['CABLE_SMtop'+str(add_SMtopXm)+'m_percentile'].values),
#                                                    var_output_dry[head+model_out_name], np.nan)
#     var_output_wet.loc[:,head+model_out_name] = np.where(~np.isnan(var_output_wet['CABLE_SMtop'+str(add_SMtopXm)+'m_percentile'].values),
#                                                    var_output_wet[head+model_out_name], np.nan)


AR-SLu 4797
AT-Neu 10671
AU-ASM 31081
AU-Cow 12219
AU-Cpr 33798
AU-Ctr 16568
AU-Cum 19291
AU-DaP 15741
AU-DaS 33970
AU-Dry 20918
AU-Emr 10547
AU-GWW 17816
AU-Gin 26340
AU-How 51142
AU-Lit 7797
AU-Otw 1732
AU-Rig 0
AU-Rob 0
AU-Sam 20424
AU-Stp 38956
AU-TTE 22705
AU-Tum 16852
AU-Whr 0
AU-Wrr 0
AU-Ync 0
BE-Bra 28178
BE-Lon 20659
BE-Vie 35643
BR-Sa3 9201
BW-Ma1 5859
CA-NS1 0
CA-NS2 0
CA-NS4 0
CA-NS5 0
CA-NS6 0
CA-NS7 0
CA-Qcu 5685
CA-Qfo 9511
CA-SF1 0
CA-SF2 0
CA-SF3 0
CH-Cha 9060
CH-Dav 22922
CH-Fru 11139
CH-Oe1 10803
CN-Cha 4019
CN-Cng 5186
CN-Dan 1346
CN-Din 0
CN-Du2 3549
CN-HaM 622
CN-Qia 7159
CZ-wet 10179
DE-Bay 4621
DE-Geb 27073
DE-Gri 16124
DE-Hai 21219
DE-Kli 19578
DE-Meh 6370
DE-Obe 14664
DE-Seh 5679
DE-SfN 2273
DE-Tha 38311
DE-Wet 9908
DK-Fou 969
DK-Lva 4562
DK-Ris 3504
DK-Sor 40423
DK-ZaH 1127
ES-ES1 41383
ES-ES2 8391
ES-LMa 13019
ES-LgS 2072
ES-VDA 2228
FI-Hyy 35298
FI-Kaa 4043
FI-Lom 2752
FI-Sod 10896
FR-Fon 22860
FR-Gri 21475
FR-Hes 18018
FR-LBr 16910
FR-Lq1 6086
FR-Lq2 6291
