In [1]:
import os
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import xarray as xr
from dateutil.relativedelta import relativedelta


from my_functions import read_obsfcstana_extend_datetime
from my_functions import read_obsfcstana

In [2]:
# expt_name = 'DAv7_M36_ASCAT_type_13_no_catdef_fp', 'DAv7_M36_ASCAT_type_2_fp_precip', 'DAv7_M36_ASCAT_type_13_test_catdef'
expt_name = 'DAv7_M36_ASCAT_type_2_fp_precip'

start_date = datetime(2015, 4, 1)
end_date = datetime(2015, 4, 20)

start_date_str = start_date.strftime('%Y%m%d')
end_date_str = end_date.strftime('%Y%m%d')

# filename = f"{start_date_str}_{end_date_str}.npz"

In [3]:
# Produce with extended date_time
# Define the list of years
years = [str(year) for year in range(start_date.year, end_date.year + 1)]
print('years = ', years)

# Define the common file name start
file_name_start = expt_name+'.ens_avg.ldas_ObsFcstAna.20'

# Define the print flag
printflag = False

# Loop over the years
for i in range(len(years)-1):
    # Define the current and next year
    current_year = years[i]
    next_year = years[i+1]
    # Define the list of paths
    paths = []
    for month in range(4, 13):
        path = f'/discover/nobackup/amfox/Experiments/{expt_name}/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/Y{current_year}/M{month:02d}'
        paths.append(path)

    for month in range(1, 4):
        path = f'/discover/nobackup/amfox/Experiments/{expt_name}/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/Y{next_year}/M{month:02d}'
        paths.append(path)
    
    # Initialize lists to store the returned values
    date_times = []
    obs_species_list = []
    obs_tilenum_list = []
    obs_lon_list = []
    obs_lat_list = []
    obs_obs_list = []
    obs_fcst_list = []
    obs_ana_list = []
    
    # Loop over the paths for the current year
    for path in paths:
        # Print the current path
        print("Current path:", path)

        # Call the read_obsfcstana function for the current path
        date_time, obs_species, obs_tilenum, obs_lon, obs_lat, obs_obs, obs_obsvar, obs_fcst, obs_fcstvar, obs_ana, obs_anavar = read_obsfcstana_extend_datetime(path, file_name_start, printflag)
        
        # Append the returned values to the lists
        date_times.append(date_time)
        obs_species_list.append(obs_species)
        obs_tilenum_list.append(obs_tilenum)
        obs_lon_list.append(obs_lon)
        obs_lat_list.append(obs_lat)
        obs_obs_list.append(obs_obs)
        obs_fcst_list.append(obs_fcst)
        obs_ana_list.append(obs_ana)
    
    # Combine the returned values from all paths
    date_time_out = np.concatenate(date_times)
    obs_species_out = np.concatenate(obs_species_list)
    obs_tilenum_out = np.concatenate(obs_tilenum_list)
    obs_lon_out = np.concatenate(obs_lon_list)
    obs_lat_out = np.concatenate(obs_lat_list)
    obs_obs_out = np.concatenate(obs_obs_list)
    obs_fcst_out = np.concatenate(obs_fcst_list)
    obs_ana_out = np.concatenate(obs_ana_list)
    
    # Save the returned values to a file including the current year in the file name
    np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_obsfcstana_extend_datetime_{current_year}.npz',
             date_time=date_time_out,
             obs_species=obs_species_out,
             obs_tilenum=obs_tilenum_out,
             obs_lon=obs_lon_out,
             obs_lat=obs_lat_out,
             obs_obs=obs_obs_out,
             obs_fcst=obs_fcst_out,
             obs_ana=obs_ana_out)

years =  ['2015']


In [4]:
# Directory path to search for NetCDF files
root_directory = f'/discover/nobackup/amfox/Experiments/{expt_name}/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/'

# Initialize an empty list to store the calculated sfmc_increment values
sfmc_increment_list = []
rzmc_increment_list = []
prmc_increment_list = []

time_stamp_list = []

current_date = start_date

while current_date <= end_date:
    year_month_directory = os.path.join(root_directory, 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")
    print(year_month_directory)
    for filename in sorted(os.listdir(year_month_directory)):
        if filename.endswith('.nc4') and not filename.endswith('z.nc4') and filename.startswith(f'{expt_name}.inst3_1d_lndfcstana_Nt.2'):
            # Construct the full file path
            file_path = os.path.join(year_month_directory, filename)
            # Open the NetCDF file using xarray
            ds = xr.open_dataset(file_path)           

            # Extract time_stamp
            time_stamp = ds['time_stamp']
            
            time_stamp_list.append(time_stamp)
            
            # Extract the SFMC_ANA and SFMC_FCST variables
            sfmc_ana = ds['SFMC_ANA']
            sfmc_fcst = ds['SFMC_FCST']
            rzmc_ana = ds['RZMC_ANA']
            rzmc_fcst = ds['RZMC_FCST']
            prmc_ana = ds['PRMC_ANA']
            prmc_fcst = ds['PRMC_FCST']            
            
            # Calculate the sfmc_increment
            sfmc_increment = sfmc_ana - sfmc_fcst
            rzmc_increment = rzmc_ana - rzmc_fcst
            prmc_increment = prmc_ana - prmc_fcst
            
            # Append the sfmc_increment values to the list
            sfmc_increment_list.append(sfmc_increment)
            rzmc_increment_list.append(rzmc_increment)
            prmc_increment_list.append(prmc_increment)
            
            # Close the NetCDF file
            ds.close()
            
    current_date += relativedelta(months=1)

# Concatenate the sfmc_increment values along the time dimension
print('working on sfmc_increment_concat')
sfmc_increment_concat = xr.concat(sfmc_increment_list, dim='time')
print('working on rzmc_increment_concat')
rzmc_increment_concat = xr.concat(rzmc_increment_list, dim='time')
print('working on prmc_increment_concat')
prmc_increment_concat = xr.concat(prmc_increment_list, dim='time')

time_stamp_concat = xr.concat(time_stamp_list, dim='time')

# Save both the concatenated sfmc_increment and rzmc_increment values to a new npsavez file
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_increments_concat.npz',
         time_stamp_concat=time_stamp_concat,
         sfmc_increment_concat=sfmc_increment_concat,
         rzmc_increment_concat=rzmc_increment_concat, 
         prmc_increment_concat=prmc_increment_concat)

/discover/nobackup/amfox/Experiments/DAv7_M36_ASCAT_type_2_fp_precip/DAv7_M36_ASCAT_type_2_fp_precip/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2015/M04
working on sfmc_increment_concat
working on rzmc_increment_concat
working on prmc_increment_concat


In [5]:
# Calculate the mean sfmc_increment for each tile along the time dimension
mean_sfmc_increment = []
std_sfmc_increment = []
mean_rzmc_increment = []
std_rzmc_increment = []
mean_prmc_increment = []
std_prmc_increment = []

for i in range(len(sfmc_increment_concat['tile'])):
    mean_sfmc_increment.append(np.mean(sfmc_increment_concat[:, i]))
    std_sfmc_increment.append(np.std(sfmc_increment_concat[:, i]))
    mean_rzmc_increment.append(np.mean(rzmc_increment_concat[:, i]))
    std_rzmc_increment.append(np.std(rzmc_increment_concat[:, i]))
    mean_prmc_increment.append(np.mean(prmc_increment_concat[:, i]))
    std_prmc_increment.append(np.std(prmc_increment_concat[:,i]))

# Save a new npsavez file
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_increment_stats.npz',
        mean_sfmc_increment=mean_sfmc_increment,
        std_sfmc_increment=std_sfmc_increment,
        mean_rzmc_increment=mean_rzmc_increment,
        std_rzmc_increment=std_rzmc_increment,
        mean_prmc_increment=mean_prmc_increment,
        std_prmc_increment=std_prmc_increment)

In [6]:
# Calculate the mean sfmc_increment along the tile dimension
ts_mean_sfmc_increment = []
ts_std_sfmc_increment = []
ts_mean_rzmc_increment = []
ts_std_rzmc_increment = []
ts_mean_prmc_increment = []
ts_std_prmc_increment = []

for i in range(len(sfmc_increment_concat['time'])):
    ts_mean_sfmc_increment.append(np.mean(sfmc_increment_concat[:, i]))
    ts_std_sfmc_increment.append(np.std(sfmc_increment_concat[:, i]))
    ts_mean_rzmc_increment.append(np.mean(rzmc_increment_concat[:, i]))
    ts_std_rzmc_increment.append(np.std(rzmc_increment_concat[:, i]))
    ts_mean_prmc_increment.append(np.mean(prmc_increment_concat[:, i]))
    ts_std_prmc_increment.append(np.std(prmc_increment_concat[:,i]))

# Save a new npsavez file
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_increment_timeseries.npz',
        ts_mean_sfmc_increment=ts_mean_sfmc_increment,
        ts_std_sfmc_increment=ts_std_sfmc_increment,
        ts_mean_rzmc_increment=ts_mean_rzmc_increment,
        ts_std_rzmc_increment=ts_std_rzmc_increment,
        ts_mean_prmc_increment=ts_mean_prmc_increment,
        ts_std_prmc_increment=ts_std_prmc_increment)

In [7]:
# Define the path directory
path_dir = f'/discover/nobackup/amfox/Experiments/{expt_name}/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg'

# Define the common file name start
file_name_start = f'{expt_name}.ens_avg.ldas_ObsFcstAna.'

# Define the print flag
printflag = False

# Initialize lists to store the mean values for each variable and the dates
obs_obs_mean_list = []
obs_fcst_mean_list = []
obs_ana_mean_list = []
omf_mean_list = []
oma_mean_list = []
dates_list = []
omf_max_list = []
omf_std_list = []
oma_std_list = []

# Define the start and end dates
# start_date = datetime.strptime('20150401', '%Y%m%d')
# end_date = datetime.strptime('20210331', '%Y%m%d')

# Loop over the dates
current_date = start_date
while current_date <= end_date:
    # Define the file name for the current date
    file_name = file_name_start + current_date.strftime('%Y%m%d')
    if file_name[-4:] == '0401':
        print('file_name = ', file_name)
    
    # Call the read_obsfcstana function for the current file
    date_time, obs_species, obs_tilenum, obs_lon, obs_lat, obs_obs, obs_obsvar, obs_fcst, obs_fcstvar, obs_ana, obs_anavar = read_obsfcstana(path_dir, file_name, printflag)

    # Convert the lists to numpy arrays
    obs_obs = np.array(obs_obs)
    obs_fcst = np.array(obs_fcst)
    obs_ana = np.array(obs_ana)
    # obs_obs = np.array(obs_obs[obs_species > 4])
    # obs_fcst = np.array(obs_fcst[obs_species > 4])
    # obs_ana = np.array(obs_ana[obs_species > 4])
    
    # Calculate the mean values for the variables
    obs_obs_mean = np.mean(obs_obs)
    obs_fcst_mean = np.mean(obs_fcst)
    obs_ana_mean = np.mean(obs_ana)
    omf_mean = np.mean(obs_obs - obs_fcst)
    oma_mean = np.mean(obs_obs - obs_ana)
    # Calculate the maximum absolute difference between obs_obs and obs_fcst
    if obs_fcst.size > 0 and obs_obs.size > 0 and obs_fcst.shape == obs_obs.shape:
        omf_max = np.max(abs(obs_obs - obs_fcst))
    else:
        omf_max = np.nan
        print('Current date = ', current_date)
    omf_std = np.std(obs_obs - obs_fcst)
    oma_std = np.std(obs_obs - obs_ana)


    # Append the mean values to the lists
    obs_obs_mean_list.append(obs_obs_mean)
    obs_fcst_mean_list.append(obs_fcst_mean)
    obs_ana_mean_list.append(obs_ana_mean)
    omf_mean_list.append(omf_mean)
    oma_mean_list.append(oma_mean)
    omf_max_list.append(omf_max)
    omf_std_list.append(omf_std)
    oma_std_list.append(oma_std)
    
    # Append the current date to the dates list
    dates_list.append(current_date.strftime('%Y%m%d'))

    # Increment the current date by one day
    current_date += timedelta(days=1)

file_name =  DAv7_M36_ASCAT_type_2_fp_precip.ens_avg.ldas_ObsFcstAna.20150401


In [8]:
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_OmF_ts.npz',
         dates_list=dates_list,
         obs_obs_mean_list=obs_obs_mean_list,
         obs_fcst_mean_list=obs_fcst_mean_list,
         obs_ana_mean_list=obs_ana_mean_list,
         omf_mean_list=omf_mean_list,
         oma_mean_list=oma_mean_list,
         omf_max_list=omf_max_list,
         omf_std_list=omf_std_list,
         oma_std_list=oma_std_list) 

In [9]:
root_directory = f'/discover/nobackup/amfox/Experiments/{expt_name}/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg'

sm_surface_list = []
sm_rootzone_list = []
sm_profile_list = []
precipitation_total_surface_flux_list = []
vegetation_greenness_fraction_list = []
leaf_area_index_list = []
time_stamp_list = []

current_date = start_date

while current_date <= end_date:
    year_month_directory = os.path.join(root_directory, 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")
    print(year_month_directory)
    for filename in sorted(os.listdir(year_month_directory)):
        if filename.endswith('.nc4') and not filename.endswith('z.nc4') and filename.startswith(f'{expt_name}.SMAP_L4_SM_gph.2'):
            file_path = os.path.join(year_month_directory, filename)
            ds = xr.open_dataset(file_path)
            
            # Extract time_stamp
            time_stamp = ds['time_stamp']
            
            sm_surface = ds['sm_surface']
            sm_rootzone = ds['sm_rootzone']
            sm_profile = ds['sm_profile']
            precipitation_total_surface_flux = ds['precipitation_total_surface_flux']
            vegetation_greenness_fraction = ds['vegetation_greenness_fraction']
            leaf_area_index = ds['leaf_area_index']
                       
            time_stamp_list.append(time_stamp)    
            sm_surface_list.append(sm_surface)
            sm_rootzone_list.append(sm_rootzone)
            sm_profile_list.append(sm_profile)
            precipitation_total_surface_flux_list.append(precipitation_total_surface_flux)
            vegetation_greenness_fraction_list.append(vegetation_greenness_fraction)
            leaf_area_index_list.append(leaf_area_index)

            ds.close()
    current_date += relativedelta(months=1)

/discover/nobackup/amfox/Experiments/DAv7_M36_ASCAT_type_2_fp_precip/DAv7_M36_ASCAT_type_2_fp_precip/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2015/M04


In [10]:
# Concatenate the sfmc_increment values along the time dimension
print('working on sm_surface_concat')
sm_surface_concat = xr.concat(sm_surface_list, dim='time')
print('working on sm_rootzone_concat')
sm_rootzone_concat = xr.concat(sm_rootzone_list, dim='time')
print('working on sm_profile_concat')
sm_profile_concat = xr.concat(sm_profile_list, dim='time')
print('working on precipitation_total_surface_flux_concat')
precipitation_total_surface_flux_concat = xr.concat(precipitation_total_surface_flux_list, dim='time')
print('working on vegetation_greenness_fraction_concat')
vegetation_greenness_fraction_concat = xr.concat(vegetation_greenness_fraction_list, dim='time')
print('working on leaf_area_index_concat')
leaf_area_index_concat = xr.concat(leaf_area_index_list, dim='time')
print('working on time_stamp_concat')
time_stamp_concat = xr.concat(time_stamp_list, dim='time')

working on sm_surface_concat
working on sm_rootzone_concat
working on sm_profile_concat
working on precipitation_total_surface_flux_concat
working on vegetation_greenness_fraction_concat
working on leaf_area_index_concat
working on time_stamp_concat


In [11]:
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_SMAP_L4_SM_gph_concat.npz',
         sm_surface_concat=sm_surface_concat,
         sm_rootzone_concat=sm_rootzone_concat,
         sm_profile_concat=sm_profile_concat,
         precipitation_total_surface_flux_concat=precipitation_total_surface_flux_concat,
         vegetation_greenness_fraction_concat=vegetation_greenness_fraction_concat,
         leaf_area_index_concat=leaf_area_index_concat,
         time_stamp_concat=time_stamp_concat)

In [12]:
# Calculate the mean, etc  for each tile along the time dimension
mean_sm_surface = []
std_sm_surface = []
mean_sm_rootzone = []
std_sm_rootzone = []
mean_sm_profile = []
std_sm_profile = []
mean_precipitation_total_surface_flux = []
mean_vegetation_greenness_fraction = []
max_vegetation_greenness_fraction = []
mean_leaf_area_index = []
max_leaf_area_index = []

for i in range(len(sm_surface_concat['tile'])):
    mean_sm_surface.append(np.mean(sm_surface_concat[:, i]))
    std_sm_surface.append(np.std(sm_surface_concat[:, i]))
    mean_sm_rootzone.append(np.mean(sm_rootzone_concat[:, i]))
    std_sm_rootzone.append(np.std(sm_rootzone_concat[:, i]))
    mean_sm_profile.append(np.mean(sm_profile_concat[:, i]))
    std_sm_profile.append(np.std(sm_profile_concat[:, i]))
    mean_precipitation_total_surface_flux.append(np.mean(precipitation_total_surface_flux_concat[:, i]))
    mean_vegetation_greenness_fraction.append(np.mean(vegetation_greenness_fraction_concat[:, i]))
    max_vegetation_greenness_fraction.append(np.max(vegetation_greenness_fraction_concat[:, i]))
    mean_leaf_area_index.append(np.mean(leaf_area_index_concat[:, i]))
    max_leaf_area_index.append(np.max(leaf_area_index_concat[:, i]))

In [13]:
# Save both the concatenated sfmc_increment and rzmc_increment values to a new npsavez file
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_SMAP_L4_SM_gph_stats.npz', 
         mean_sm_surface=mean_sm_surface,
         std_sm_surface=std_sm_surface,
         mean_sm_rootzone=mean_sm_rootzone,
         std_sm_rootzone=std_sm_rootzone,
         mean_sm_profile=mean_sm_profile,
         std_sm_profile=std_sm_profile,
         mean_precipitation_total_surface_flux=mean_precipitation_total_surface_flux,
         mean_vegetation_greenness_fraction=mean_vegetation_greenness_fraction,
         max_vegetation_greenness_fraction=max_vegetation_greenness_fraction,
         mean_leaf_area_index=mean_leaf_area_index,
         max_leaf_area_index=max_leaf_area_index)

In [14]:
# Calculate the mean, etc for each time step along the tile dimension
ts_mean_sm_surface = []
ts_std_sm_surface = []
ts_mean_sm_rootzone = []
ts_std_sm_rootzone = []
ts_mean_sm_profile = []
ts_std_sm_profile = []
ts_mean_precipitation_total_surface_flux = []
ts_mean_vegetation_greenness_fraction = []
ts_max_vegetation_greenness_fraction = []
ts_mean_leaf_area_index = []
ts_max_leaf_area_index = []

for i in range(len(sm_surface_concat['time'])):
    ts_mean_sm_surface.append(np.mean(sm_surface_concat[i, :]))
    ts_std_sm_surface.append(np.std(sm_surface_concat[i, :]))
    ts_mean_sm_rootzone.append(np.mean(sm_rootzone_concat[i, :]))
    ts_std_sm_rootzone.append(np.std(sm_rootzone_concat[i, :]))
    ts_mean_sm_profile.append(np.mean(sm_profile_concat[i, :]))
    ts_std_sm_profile.append(np.std(sm_profile_concat[i, :]))
    ts_mean_precipitation_total_surface_flux.append(np.mean(precipitation_total_surface_flux_concat[i, :]))
    ts_mean_vegetation_greenness_fraction.append(np.mean(vegetation_greenness_fraction_concat[i, :]))
    ts_max_vegetation_greenness_fraction.append(np.max(vegetation_greenness_fraction_concat[i, :]))
    ts_mean_leaf_area_index.append(np.mean(leaf_area_index_concat[i, :]))
    ts_max_leaf_area_index.append(np.max(leaf_area_index_concat[i, :]))

In [15]:
# Save the time series to a new npsavez file
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_SMAP_L4_SM_gph_timeseries.npz', 
         ts_mean_sm_surface=ts_mean_sm_surface,
         ts_std_sm_surface=ts_std_sm_surface,
         ts_mean_sm_rootzone=ts_mean_sm_rootzone,
         ts_std_sm_rootzone=ts_std_sm_rootzone,
         ts_mean_sm_profile=ts_mean_sm_profile,
         ts_std_sm_profile=ts_std_sm_profile,
         ts_mean_precipitation_total_surface_flux=ts_mean_precipitation_total_surface_flux,
         ts_mean_vegetation_greenness_fraction=ts_mean_vegetation_greenness_fraction,
         ts_max_vegetation_greenness_fraction=ts_max_vegetation_greenness_fraction,
         ts_mean_leaf_area_index=ts_mean_leaf_area_index,
         ts_max_leaf_area_index=ts_max_leaf_area_index)