In [None]:
import xarray as xr
import numpy as np
from datetime import datetime, timedelta
from mapper_functions import plot_global

In [None]:
# expt_name = 'DAv7_M36_ASCAT_type_13_no_catdef_fp', 'DAv7_M36_ASCAT_type_2_fp_precip'
expt_name = 'DAv7_M36_ASCAT_type_2_fp_precip'

start_date = datetime(2015, 4, 1)
end_date = datetime(2015, 4, 20)

start_date_str = start_date.strftime('%Y%m%d')
end_date_str = end_date.strftime('%Y%m%d')

In [None]:
# Specify the path to the netCDF file
file_path = f'/discover/nobackup/amfox/Experiments/{expt_name}/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2015/M04/{expt_name}.catch_progn_incr.20150402.nc4'

# Open the netCDF file
dataset = xr.open_dataset(file_path)

# Extract the lon and lat variables
lon = dataset['lon']
lat = dataset['lat']

# Print the dimensions of the variables
print(f"Dimensions of lon: {lon.shape}")
print(f"Dimensions of lat: {lat.shape}")

In [None]:
date_time = []
obs_species = []
obs_tilenum = []
obs_lon = []
obs_lat = []
obs_obs = []
obs_fcst = []
obs_ana = []

years = [str(year) for year in range(start_date.year, end_date.year + 1)]
print('years = ', years)
for i in range(len(years)-1):
    # Define the current and next year
    current_year = years[i]
    print('Current year = ', current_year)
    data = np.load(f'{expt_name}_{start_date_str}_{end_date_str}_obsfcstana_extend_datetime_{current_year}.npz', allow_pickle=True)
    date_time.extend(data['date_time'])
    obs_species.extend(data['obs_species'])
    obs_tilenum.extend(data['obs_tilenum'])
    obs_lon.extend(data['obs_lon'])
    obs_lat.extend(data['obs_lat'])
    obs_obs.extend(data['obs_obs'])
    obs_fcst.extend(data['obs_fcst'])
    obs_ana.extend(data['obs_ana'])
    

In [None]:
# Convert to np arrays
obs_species = np.array(obs_species)
obs_tilenum = np.array(obs_tilenum)
obs_lon = np.array(obs_lon)
obs_lat = np.array(obs_lat)
obs_obs = np.array(obs_obs)
obs_fcst = np.array(obs_fcst)
obs_ana = np.array(obs_ana)

In [None]:
# Calculate obs minus fcst
obs_minus_fcst = []
obs_minus_ana = []

print('Number of obs = ', len(obs_obs))

for i in range(len(obs_obs)):
    obs_minus_fcst_chunk = obs_obs[i] - obs_fcst[i]
    obs_minus_fcst.append(obs_minus_fcst_chunk)
    obs_minus_ana_chunk = obs_obs[i] - obs_ana[i]
    obs_minus_ana.append(obs_minus_ana_chunk)
    

In [None]:
# Convert the lists to numpy arrays
obs_minus_fcst = np.array(obs_minus_fcst)
obs_minus_ana = np.array(obs_minus_ana)

In [None]:
# Find unique tilenum values
unique_tilenum = np.unique(obs_tilenum)

# Find the number of unique tilenum values
num_unique_tilenum = len(unique_tilenum)

# Print the number of unique tilenum values
print(f"Number of unique tilenum values: {num_unique_tilenum}")

In [None]:
# Sort the arrays based on obs_tilenum
sort_indices = np.argsort(obs_tilenum)
sorted_obs_tilenum = obs_tilenum[sort_indices]
sorted_obs_species = obs_species[sort_indices]
sorted_obs_obs = obs_obs[sort_indices]
sorted_obs_fcst = obs_fcst[sort_indices]
sorted_obs_ana = obs_ana[sort_indices]
sorted_obs_minus_fcst = obs_minus_fcst[sort_indices]
sorted_obs_minus_ana = obs_minus_ana[sort_indices]

# Find the unique tilenum values and their counts
unique_tilenum, counts = np.unique(sorted_obs_tilenum, return_counts=True)

# Calculate the indices where the groups should be split
split_indices = np.cumsum(counts)[:-1]

# Split the sorted arrays based on the split indices
obs_species_grouped = np.split(sorted_obs_species, split_indices)
obs_obs_grouped = np.split(sorted_obs_obs, split_indices)
obs_fcst_grouped = np.split(sorted_obs_fcst, split_indices)
obs_ana_grouped = np.split(sorted_obs_ana, split_indices)
obs_minus_fcst_grouped = np.split(sorted_obs_minus_fcst, split_indices)
obs_minus_ana_grouped = np.split(sorted_obs_minus_ana, split_indices)

In [None]:
# Print the length of obs_obs_grouped
print(f"Length of obs_obs_grouped: {len(obs_obs_grouped)}")

In [None]:
# Assign lon and lat to each tilenum
lon_tilenum = []
lat_tilenum = []
for i in range(num_unique_tilenum):
    lon_tilenum.append(lon[int(unique_tilenum[i])])
    lat_tilenum.append(lat[int(unique_tilenum[i])])

# Convert the lists to numpy arrays
lon_tilenum = np.array(lon_tilenum)
lat_tilenum = np.array(lat_tilenum)

In [None]:
# Run this cell if you have a single sensor experiment

# Find the number of observations for each tilenum
num_obs = []
for i in range(num_unique_tilenum):
    num_obs.append(len(obs_obs_grouped[i]))

# Calculate the mean obs_obs for each tilenum
mean_obs_obs = []
for i in range(num_unique_tilenum):
    mean_obs_obs.append(np.mean(obs_obs_grouped[i]))
    
# Calculate the std obs_obs for each tilenum
std_obs_obs = []
for i in range(num_unique_tilenum):
    std_obs_obs.append(np.std(obs_obs_grouped[i]))    

# Calculate the mean obs_fcst for each tilenum
mean_obs_fcst = []
for i in range(num_unique_tilenum):
    mean_obs_fcst.append(np.mean(obs_fcst_grouped[i]))

# Calculate the mean obs_ana for each tilenum
mean_obs_ana = []
for i in range(num_unique_tilenum):
    mean_obs_ana.append(np.mean(obs_ana_grouped[i]))

# Calculate the mean obs_minus_fcst for each tilenum
mean_obs_minus_fcst = []
for i in range(num_unique_tilenum):
    mean_obs_minus_fcst.append(np.mean(obs_minus_fcst_grouped[i]))

# Calculate the mean obs_minus_ana for each tilenum
mean_obs_minus_ana = []
for i in range(num_unique_tilenum):
    mean_obs_minus_ana.append(np.mean(obs_minus_ana_grouped[i]))

# Calculate the standard deviation of obs_minus_fcst for each tilenum
std_obs_minus_fcst = []
for i in range(num_unique_tilenum):
    std_obs_minus_fcst.append(np.std(obs_minus_fcst_grouped[i]))

# Calculate the standard deviation of obs_minus_ana for each tilenum
std_obs_minus_ana = []
for i in range(num_unique_tilenum):
    std_obs_minus_ana.append(np.std(obs_minus_ana_grouped[i]))

In [None]:
# Run this cell if you have a single sensor experiment
# Save all of the calculated values to a file
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_obsfcstana_stats.npz',
         unique_tilenum=unique_tilenum,
         num_obs=num_obs,
         mean_obs_obs=mean_obs_obs,
         std_obs_obs=std_obs_obs,
         mean_obs_fcst=mean_obs_fcst,
         mean_obs_ana=mean_obs_ana,
         mean_obs_minus_fcst=mean_obs_minus_fcst,
         mean_obs_minus_ana=mean_obs_minus_ana,
         std_obs_minus_fcst=std_obs_minus_fcst,
         std_obs_minus_ana=std_obs_minus_ana,
         lon_tilenum=lon_tilenum,
         lat_tilenum=lat_tilenum)

In [None]:
# Run this cell if you have a multi-sensor experiment

# Find the number of observations for each tilenum
num_obs_smap = []
num_obs_ascat = []
for i in range(num_unique_tilenum):
    num_obs_smap.append(len(obs_obs_grouped[i][obs_species_grouped[i] < 5]))
    num_obs_ascat.append(len(obs_obs_grouped[i][obs_species_grouped[i] > 4]))

#Calculate the mean of the observations for each tilenum
mean_obs_smap = []
mean_obs_ascat = []
for i in range(num_unique_tilenum):
    mean_obs_smap.append(np.mean(obs_obs_grouped[i][obs_species_grouped[i] < 5]))
    mean_obs_ascat.append(np.mean(obs_obs_grouped[i][obs_species_grouped[i] > 4]))

# Calculate the mean of the forecasts for each tilenum
mean_fcst_smap = []
mean_fcst_ascat = []
for i in range(num_unique_tilenum):
    mean_fcst_smap.append(np.mean(obs_fcst_grouped[i][obs_species_grouped[i] < 5]))
    mean_fcst_ascat.append(np.mean(obs_fcst_grouped[i][obs_species_grouped[i] > 4]))

# Calculate the mean of the analyses for each tilenum
mean_ana_smap = []
mean_ana_ascat = []
for i in range(num_unique_tilenum):
    mean_ana_smap.append(np.mean(obs_ana_grouped[i][obs_species_grouped[i] < 5]))
    mean_ana_ascat.append(np.mean(obs_ana_grouped[i][obs_species_grouped[i] > 4]))

# Calculate the mean of the obs minus fcst for each tilenum
mean_obs_minus_fcst_smap = []
mean_obs_minus_fcst_ascat = []
for i in range(num_unique_tilenum):
    mean_obs_minus_fcst_smap.append(np.mean(obs_minus_fcst_grouped[i][obs_species_grouped[i] < 5]))
    mean_obs_minus_fcst_ascat.append(np.mean(obs_minus_fcst_grouped[i][obs_species_grouped[i] > 4]))

# Calculate the mean of the obs minus ana for each tilenum
mean_obs_minus_ana_smap = []
mean_obs_minus_ana_ascat = []
for i in range(num_unique_tilenum):
    mean_obs_minus_ana_smap.append(np.mean(obs_minus_ana_grouped[i][obs_species_grouped[i] < 5]))
    mean_obs_minus_ana_ascat.append(np.mean(obs_minus_ana_grouped[i][obs_species_grouped[i] > 4]))

# Calculate the standard deviation of the obs_minus_fcst for each tilenum
std_obs_minus_fcst_smap = []
std_obs_minus_fcst_ascat = []
for i in range(num_unique_tilenum):
    std_obs_minus_fcst_smap.append(np.std(obs_minus_fcst_grouped[i][obs_species_grouped[i] < 5]))
    std_obs_minus_fcst_ascat.append(np.std(obs_minus_fcst_grouped[i][obs_species_grouped[i] > 4]))

# Calculate the standard deviation of the obs_minus_ana for each tilenum
std_obs_minus_ana_smap = []
std_obs_minus_ana_ascat = []
for i in range(num_unique_tilenum):
    std_obs_minus_ana_smap.append(np.std(obs_minus_ana_grouped[i][obs_species_grouped[i] < 5]))
    std_obs_minus_ana_ascat.append(np.std(obs_minus_ana_grouped[i][obs_species_grouped[i] > 4]))


In [None]:
# Run this cell if you have a multi-sensor experiment
# Save all the calculated values to a file
np.savez(f'{expt_name}_{start_date_str}_{end_date_str}_obsfcstana_stats.npz',
         unique_tilenum=unique_tilenum,
         num_obs_smap=num_obs_smap,
         num_obs_ascat=num_obs_ascat,
         mean_obs_smap=mean_obs_smap,
         mean_obs_ascat=mean_obs_ascat,
         mean_fcst_smap=mean_fcst_smap,
         mean_fcst_ascat=mean_fcst_ascat,
         mean_ana_smap=mean_ana_smap,
         mean_ana_ascat=mean_ana_ascat,
         mean_obs_minus_fcst_smap=mean_obs_minus_fcst_smap,
         mean_obs_minus_fcst_ascat=mean_obs_minus_fcst_ascat,
         mean_obs_minus_ana_smap=mean_obs_minus_ana_smap,
         mean_obs_minus_ana_ascat=mean_obs_minus_ana_ascat,
         std_obs_minus_fcst_smap=std_obs_minus_fcst_smap,
         std_obs_minus_fcst_ascat=std_obs_minus_fcst_ascat,
         std_obs_minus_ana_smap=std_obs_minus_ana_smap,
         std_obs_minus_ana_ascat=std_obs_minus_ana_ascat,
         lon_tilenum=lon_tilenum,
         lat_tilenum=lat_tilenum)

In [None]:

    obarray = np.empty([num_unique_tilenum, 3])
    obarray[:, 1] = lon_tilenum
    obarray[:, 2] = lat_tilenum
    obarray[:, 0] = num_obs_ascat
    
    plot_global(obarray,False,'Number of ASCAT Obs Assimilated','Total')