In [None]:
import os
import glob
import xarray as xr
import numpy as np

In [None]:
def find_files(directory, pattern):
    # Use glob to search for files matching the pattern in the directory and subdirectories
    return glob.glob(os.path.join(directory, '**', pattern), recursive=True)

directory = '/discover/nobackup/projects/gmao/smap/SMAP_Nature/CYGNSS/'
pattern = 'cyg.ddmi.s*'

files = find_files(directory, pattern)

In [None]:
ds = xr.open_dataset('/discover/nobackup/projects/gmao/smap/SMAP_Nature/CYGNSS/ucar_cu_cygnss_sm_v1_static_flags.nc')

# Import the QC flags flag_small_SM_range, flag_poor_SMAP, flag_high_ubrmsd, flag_few_obs, flag_low_signal

flag_small_SM_range = ds['flag_small_SM_range']
flag_poor_SMAP = ds['flag_poor_SMAP']
flag_high_ubrmsd = ds['flag_high_ubrmsd']
flag_few_obs = ds['flag_few_obs']
flag_low_signal = ds['flag_low_signal']

# Rearrange 2D data into 1D

flag_small_SM_range = flag_small_SM_range.values.flatten()
flag_poor_SMAP = flag_poor_SMAP.values.flatten()
flag_high_ubrmsd = flag_high_ubrmsd.values.flatten()
flag_few_obs = flag_few_obs.values.flatten()
flag_low_signal = flag_low_signal.values.flatten()

# Replace the fill values (255) with 0
flag_small_SM_range[flag_small_SM_range == 255] = 0
flag_poor_SMAP[flag_poor_SMAP == 255] = 0
flag_high_ubrmsd[flag_high_ubrmsd == 255] = 0
flag_few_obs[flag_few_obs == 255] = 0
flag_low_signal[flag_low_signal == 255] = 0

# Assuming flag arrays are flattened and of the same length
combined_flag = np.full(flag_small_SM_range.shape, 0)

for i in range(flag_small_SM_range.shape[0]):
    if (flag_small_SM_range[i] == 1 or 
        flag_poor_SMAP[i] == 1 or 
        flag_high_ubrmsd[i] == 1 or 
        flag_few_obs[i] == 1 or 
        flag_low_signal[i] == 1):
        combined_flag[i] = 1

In [None]:
total = 0
cnt = 0

for file in files:
    #print(file)
    if file.endswith('grid-soil-moisture-36km.a32.d33.nc'):
        ds = xr.open_dataset(file)
        sm_subdaily = ds['SM_subdaily']
        sm_subdaily = sm_subdaily.values
        timeintervals = ds['timeintervals'].values
        for i in range(4):
            sm_subdaily_combined = sm_subdaily[i,:,:].flatten()
            sm_subdaily_combined[combined_flag == 1] = np.nan
            # print(f" Number not nan", np.sum(~np.isnan(sm_subdaily[i,:,:].flatten())))
            # print(f"Number of data QC-d out for {file} at {timeintervals[i]}:", np.sum(np.isnan(sm_subdaily_combined)) - np.sum(np.isnan(sm_subdaily[i,:,:].flatten())))
            # print(f"The fraction of data QC'd out for {file} at {timeintervals[i]}:", ((np.sum(np.isnan(sm_subdaily_combined)) - np.sum(np.isnan(sm_subdaily[i,:,:].flatten()))) / np.sum(~np.isnan(sm_subdaily[i,:,:].flatten()))))

            total += ((np.sum(np.isnan(sm_subdaily_combined)) - np.sum(np.isnan(sm_subdaily[i,:,:].flatten()))) / np.sum(~np.isnan(sm_subdaily[i,:,:].flatten())))
            cnt += 1

print(f"Number of obs periods read = ", cnt)
print(f"Mean of data QC'd out = ", total / cnt)