Let's first setup our libraries before we head onto the inversion along with loading our lat/long grid

In [None]:
#%%Import Libraries
import sys
sys.path.append('C:/Users/klm3/AppData/Local/Programs/Python/Python311/Lib/site-packages')
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import warnings
from scipy.sparse import diags
from scipy.linalg import inv as dense_inv
from scipy.linalg import cholesky 
import scipy.sparse as sp
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from shapely.geometry import Polygon, Point, box
from matplotlib.colors import Normalize, LogNorm 

warnings.filterwarnings("ignore")
mask_df = pd.read_csv('G:/SummerSchool/shapefiles/mask.csv')
mask_array = mask_df['Mask'].values
num_fluxes =sum(mask_array==1)
 
#add lons and lats
subdir = 'priors_processed'
priorstring = 'ACES_FFDAS'  
lats = np.load('G:/SummerSchool/priors/'+subdir+'/'+priorstring+'_lat.npy')
lons = np.load('G:/SummerSchool/priors/'+subdir+'/'+priorstring+'_long.npy')
lons = lons[mask_array.ravel().astype(bool)] #removes rows where mask_array = 0
lats = lats[mask_array.ravel().astype(bool)]    
lat_grid = np.unique(lats)
lon_grid = np.unique(lons)
mask = True
print('done')

In this code base, you choose different options to run your inversion.  Essentially what you are looking at is using the code to explore different types of setups that can help you find the best setup for your situation.  Remember if the base case does not work (truth = prior, met is the same, no error on y, no bias, etc.) then you are doing something wrong.

In [None]:
#%%Create y for the Inversion
#variables for the inversion
priorslist = ['ACES_FFDAS', 'GRAAPESCO2']
tower_names = ['NEB','NWB','HAL']
monthlist = ['02','07']
truth = 'ACES_FFDAS'  ### YOU CAN CHOOSE EITHER FOR YOUR TRUTH OR YOUR PRIOR
prior = 'ACES_FFDAS'
truemet = 'WRF2' #STICK WITH WRF2 FOR THIS TOY EXAMPLE
met = truemet #DO NOT CHANGE THIS

R_whitenoise = False #YOU HAVE THE OPTION OF USING WHITE NOISE HERE BUT THERE ARE OTHER WAYS TO SPECIFY THIS 
R_perfect = True #ONLY USE THIS TO ENSURE THAT YOUR INVERSION IS WORKING PERFECTLY
y_bias = False #YOU CAN CHOOSE A BIAS TO INCLUDE ON YOUR "REAL OBERVATIONS" TO SEE HOW A BIAS WILL IMPACT YOUR RESULTS
y_whitenoise = False #YOU CAN ADD HOW MUCH WHITENOISE TO USE
bias = 0 #HERE IS WHERE YOU WILL SPECIFIFY THE BIAS
signal = 1 #USED TO MAKE NOISE ON "REAL OBSERVATIONS" AND ON R IF R_WHITENOISE AND Y_WHITENOISE ARE TRUE
R_param =  signal**2# THIS IS THE WHAT WILL CREATE THE NOISE IN PPM^2 SPACE OR BE A CONSTANT VARIANCE ACROSS ALL OBSERVATIONAL TIME PERIODS
unc_save = False #LEAVE THIS AS FALSE
q_floor = 1 #CREATES A FLOOR ON Q IF YOU DECIDE TO USE THE VALUE OF THE PRIORS ALONG THE DIAGONAL - THIS ENSURES THAT THE MATRICES CAN BE INVERTED
qones = False #IF YOU WANT Q TO BE A CONSTANT DIAGONAL - SET FLAG TO TRUE.  IF YOU WANT TO VARY BY THE VALUES OF THE PRIOR SET FLAG TO FALSE
if qones:
    q_param = 1 #IF DIAGONAL YOU WANT TO SET THIS
else:
    q_param = 1 #KEEP THIS AS ONE IF YOU WANT TO HAVE DIAGONAL VARYING BY PRIOR
z_directory = 'G:/SummerSchool/output/enhancements/'
y_save_directory = 'G:/SummerSchool/output/y/'

print('True met = ' + truemet)
print('Prior met = ' + met)
print('True emissions = ' + truth)
print('Prior fluxes = '+ prior) 

In this block, we are loading data and seeing what the mean enhancement is for our "observed enhancements".

In [None]:
print('Loading Data')
def load_numpy_array(directory,fname):
    filename = f'{directory}{fname}'
    data = np.load(filename)
    return data

#load
values_feb_truth = []
values_feb_prior = []
values_jul_truth = []
values_jul_prior = []
unit_WRF2_feb =[]
unit_WRF2_jul = []

for site in tower_names:
    #Feb
    filename_feb_prior = site + '_y_' + prior+'_02_2019.npy'
    feb_values_prior = load_numpy_array(y_save_directory,filename_feb_prior)
    values_feb_prior.append(feb_values_prior)
    
    filename_feb_truth = site + '_y_' + truth+'_02_2019.npy'
    feb_values_truth = load_numpy_array(y_save_directory,filename_feb_truth)
    values_feb_truth.append(feb_values_truth)
    #print('y (feb)'+site + ' ' + str(len(feb_values_truth)))

    u_WRF2_feb = np.load(z_directory + 'unit_'+ site+'_2019_02_WRF2.npy') #change later
    u_WRF2_feb = [x for x in u_WRF2_feb if not pd.isna(x) and x!='nan']
    u_WRF2_feb  = np.array(u_WRF2_feb)
    unit_WRF2_feb.append(u_WRF2_feb) 
    
    #Jul    
    filename_jul_prior = site + '_y_' + prior+'_07_2019.npy'
    jul_values_prior = load_numpy_array(y_save_directory,filename_jul_prior)
    values_jul_prior.append(jul_values_prior)
    
    filename_jul_truth = site + '_y_' + truth+'_07_2019.npy'
    jul_values_truth = load_numpy_array(y_save_directory,filename_jul_truth)
    values_jul_truth.append(jul_values_truth)
    
    u_WRF2_jul = np.load(z_directory + 'unit_'+ site+'_2019_07_WRF2.npy') #change later
    u_WRF2_jul = [x for x in u_WRF2_jul if not pd.isna(x) and x!='nan']
    u_WRF2_jul  = np.array(u_WRF2_jul)
    unit_WRF2_jul.append(u_WRF2_jul)

#Feb
y_feb_array_truth = np.concatenate(values_feb_truth)
y_feb_array_prior = np.concatenate(values_feb_prior)
r_unit_WRF2_feb = np.concatenate(unit_WRF2_feb)
feb_mean = np.mean(y_feb_array_truth)
#Jul
y_jul_array_truth = np.concatenate(values_jul_truth)
y_jul_array_prior = np.concatenate(values_jul_prior)
r_unit_WRF2_jul = np.concatenate(unit_WRF2_jul)
jul_mean = np.mean(y_jul_array_truth)

print('Mean signal (ytruth) for Feb is '+ str(round(feb_mean,2)) +' ppm')
print('Mean signal (ytruth) for Jul is '+ str(round(jul_mean,2)) + ' ppm')

Let's first create the R matrix and see how signal we have to noise.  You already specified how to create R so you don't need to modify this code.

In [None]:
print(' ')
if R_whitenoise:
    noise_feb = np.random.normal(0,signal, size = y_feb_array_prior.shape)
    noise_jul = np.random.normal(0,signal, size = y_jul_array_prior.shape)
    R_Feb = noise_feb*R_param
    R_Jul = noise_jul*R_param
    print('R = diag of ones of multiplied by ' + str(round(R_param,2)) + ' ppm2 in Feb and Jul')
else:
    if R_perfect:
        R_Feb = np.ones(y_feb_array_prior.shape)*0.1 #this is the perfect case
        R_Jul = np.ones(y_jul_array_prior.shape)*0.1
    else: 
        R_Feb = np.ones(y_feb_array_prior.shape)*R_param#this is constant R
        R_Jul = np.ones(y_jul_array_prior.shape)*R_param

#%% R noise and plot 
fig, ax = plt.subplots(1,2,figsize=(18,6))
ax[0].plot(y_feb_array_truth,label='y:truth',color='black',linewidth =.75)
ax[0].plot(np.sqrt(R_Feb),label='R '+str(round(R_param,2))+'ppm',color='red',linewidth =1)
ax[0].legend(fontsize=14)  # Adjust font size for legend entries
ax[0].set_ylabel('ppm', fontsize=14)  # Adjust font size for y-axis label
ax[0].set_xlabel('Time index (hourly) w gaps', fontsize=14)  # Adjust font size for x-axis label
ax[0].set_title("Truth Enh. & sqrt(R) (" + prior+"-"+met+") Feb 2019", fontsize=14)
ax[0].grid(True)

ax[1].plot(y_jul_array_truth,label='y:truth',color='black',linewidth =.75)
ax[1].plot(R_Jul,label='R '+str(round(R_param,2))+'ppm',color='red',linewidth =1)
ax[1].legend(fontsize=14)  # Adjust font size for legend entries
ax[1].set_ylabel('ppm', fontsize=14)  # Adjust font size for y-axis label
ax[1].set_xlabel('Time index (hourly) w gaps', fontsize=14)  # Adjust font size for x-axis label
ax[1].set_title("Truth Enh. & sqrt(R) (" + prior+"-"+met+") July 2019", fontsize=14)
ax[1].grid(True)

plt.subplots_adjust(wspace=0.15)
plt.show

print(' ')
print('R and y truth for Feb and July created!')
print('done')

What can you say about how you set up the problem?  How much signal is there to noise in R is there for Feb and July?

Now we are going to create Q and load our H matrices for February and July.  You always want to check your dimensions on everything!

In [None]:
#Creating Q & Loading Hmatrix

def load_sparse_matrix(filename):
    return sp.load_npz(filename)

Hmatrix_feb = load_sparse_matrix('G:/SummerSchool/output/Hmatrices/H_'+met+'_2019_02.npz') 
Hmatrix_jul = load_sparse_matrix('G:/SummerSchool/output/Hmatrices/H_'+met+'_2019_07.npz') 

prior_array_feb = np.load('G:/SummerSchool/output/prior/'+prior+'_2019_02.npy')
prior_array_jul = np.load('G:/SummerSchool/output/prior/'+prior+'_2019_07.npy')

truth_array_feb = np.load('G:/SummerSchool/output/prior/'+truth+'_2019_02.npy')
truth_array_jul = np.load('G:/SummerSchool/output/prior/'+truth+'_2019_07.npy')
q_size_jul = len(prior_array_jul)
q_size_feb = len(prior_array_feb)

#Check dimensions
print(f"Hmatrix shape(Feb): {Hmatrix_feb.shape}")    
print('Prior length: ' + str(len(prior_array_feb)))
print('Size of Q (July) = ' + str(q_size_jul) + ' x ' +str(q_size_jul))
print('Mean prior val for Jul is '+ str(round(np.mean(prior_array_jul),2)) + ' umol/m2s')
print('##')
print(f"Hmatrix shape(July): {Hmatrix_jul.shape}")    
print('Prior length: ' + str(len(prior_array_jul)))
print('Mean signal (ytruth) for Feb is '+ str(round(np.mean(prior_array_feb),2)) +' umol/m2s')
print('Size of Q (Feb) = ' + str(q_size_feb) + ' x ' +str(q_size_feb))

Hsp_jul = Hmatrix_jul@prior_array_jul #should be the same as original y_jul_array_prior
Hsp_feb = Hmatrix_feb@prior_array_feb #should be the same as original y_feb_array_prior
print('Created Hxsp')

y_feb_array_truth_hold = y_feb_array_truth.copy()
y_jul_array_truth_hold = y_jul_array_truth.copy()
if y_bias:
    y_feb_array_truth = y_feb_array_truth+bias
    y_jul_array_truth = y_jul_array_truth+bias
if y_whitenoise:
    y_feb_array_truth = y_feb_array_truth+noise_feb
    y_jul_array_truth = y_jul_array_truth+noise_jul    

fig, ax = plt.subplots(1,2,figsize=(18,6))
ax[0].plot(y_feb_array_truth,label='y:true enh. w noise and bias',color='black',linewidth =.75)
ax[0].plot(Hsp_feb,label='Hs:modelled enhan.',color='red',linewidth =1)
ax[0].legend(fontsize=14)  # Adjust font size for legend entries
ax[0].set_ylabel('ppm', fontsize=14)  # Adjust font size for y-axis label
ax[0].set_xlabel('Time index (hourly) w gaps', fontsize=14)  # Adjust font size for x-axis label
ax[0].set_title("y truth vs Hs Feb 2019", fontsize=14) 
ax[0].grid(True)

ax[1].plot(y_jul_array_truth,label='y:true enh. w noise and bias',color='black',linewidth =.75)
ax[1].plot(Hsp_jul,label='Hs:modelled enh.',color='red',linewidth =1)
ax[1].legend(fontsize=14)  # Adjust font size for legend entries
ax[1].set_ylabel('ppm', fontsize=14)  # Adjust font size for y-axis label
ax[1].set_xlabel('Time index (hourly) w gaps', fontsize=14)  # Adjust font size for x-axis label
ax[1].set_title("y truth vs Hs Jul 2019", fontsize=14)
ax[1].grid(True)

if qones:
    print('Q is diag of ones * ' + str(q_param**2) + ' umol/m2s^2')
    Q_diag_feb = np.ones(prior_array_feb.shape[0])*(np.square(q_param))
    Q_diag_feb = diags(Q_diag_feb,0)
    Q_diag_jul = np.ones(prior_array_jul.shape[0])*(np.square(q_param))
    Q_diag_jul = diags(Q_diag_jul,0)
else:
    print('Q is varying (per prior values) with scaling factor of ' + str(q_param)) 
    q_prior_array_feb = prior_array_feb.copy()
    q_prior_array_feb[q_prior_array_feb < 1] = q_floor
    q_diag_feb = (q_param**2) * np.square(q_prior_array_feb)
    Q_diag_feb = diags(q_diag_feb,0)
    q_prior_array_jul =prior_array_jul.copy()
    q_prior_array_jul[q_prior_array_jul < 1] = q_floor
    q_diag_jul = (q_param**2) * np.square(q_prior_array_jul)
    Q_diag_jul = diags(q_diag_jul,0)

print('##')
print('Q Feb & July scaling factor is ' + str(round(q_param**2))+ ' (umol/m2s)^2')
print('Created Q_jul and Q_feb!')
print('done')

What can you say about how your modelled enhancements are different than your observed enhancements?

Now we have to create all the pieces for the inversion for Feb and July

In [None]:
#%% HQHt &QHt Feb and July
HQ_feb = Hmatrix_feb@Q_diag_feb
HQ_jul = Hmatrix_jul@Q_diag_jul

def create_diagonal_matrix(vector):
  vector_size = len(vector)
  diagonal_matrix = np.zeros((vector_size, vector_size))
  diagonal_matrix[np.diag_indices(vector_size)] = vector
  return diagonal_matrix

R_diagonal_Feb = create_diagonal_matrix(R_Feb)
R_diagonal_Jul = create_diagonal_matrix(R_Jul)

Htrans_feb = Hmatrix_feb.T
HQHt_feb = HQ_feb@Htrans_feb
QHt_feb = Q_diag_feb@Htrans_feb
Psi_feb = HQHt_feb.toarray()+R_Feb
Psi_inv_feb = dense_inv(Psi_feb)

Htrans_jul = Hmatrix_jul.T
HQHt_jul = HQ_jul@Htrans_jul
QHt_jul = Q_diag_jul@Htrans_jul
HQ_jul = Hmatrix_jul@Q_diag_jul
Psi_jul = HQHt_jul.toarray()+R_Jul
Psi_inv_jul = dense_inv(Psi_jul)

psi_z_feb= Psi_inv_feb@(y_feb_array_truth-Hsp_feb)
shat_feb = prior_array_feb+QHt_feb@psi_z_feb 

psi_z_jul= Psi_inv_jul@(y_jul_array_truth-Hsp_jul)
shat_jul = prior_array_jul+QHt_jul@psi_z_jul 

print('Feb HQ rows = ' + str(HQ_feb.shape[0]) + ', HQ cols = ' + str(HQ_feb.shape[1]))
print('July: HQ rows = ' + str(HQ_jul.shape[0]) + ', HQ cols = ' + str(HQ_jul.shape[1]))
print('##')
print('Feb R diagonal  = ' + str(R_diagonal_Feb.shape[0]) + ', ' + str(R_diagonal_Feb.shape[1]))
print('July R diagonal  = ' + str(R_diagonal_Jul.shape[0]) + ', ' + str(R_diagonal_Jul.shape[1]))
print('##')
print('Created HQHt +R (Psi) and inv(Psi) for Feb and July!')
print('Feb and Jul emissions shat estimates!')
print('done')

Now we will calculate approximate uncertainties.  Can you look at the code and figure out why this is an approximation?

In [None]:
#%%
print('Checking that matrices are positive definite and calculating unc')
print('Takes a little while')
import scipy.io
from sksparse.cholmod import cholesky
from scipy.sparse import csr_matrix
import numpy as np
unc_save = False
Hshat_feb = Hmatrix_feb@shat_feb
Hshat_jul = Hmatrix_jul@shat_jul

######chi-square Feb
y_Hshat_feb = (y_feb_array_truth-Hshat_feb)
y_Hshat_feb=y_Hshat_feb[:,np.newaxis]
R_diag_inv_feb = np.linalg.inv(R_diagonal_Feb)
#R_diag_inv_feb = R_diagonal_Feb
y_HshatTRy_Hshat_feb = y_Hshat_feb.T@R_diag_inv_feb@y_Hshat_feb
shat_priorT_feb = (shat_feb-prior_array_feb)
shat_priorT_feb=shat_priorT_feb[:,np.newaxis]

##Estimate Uncertainty - Only Diag
# vshat=inv(Hsp'*inv(diag(R))*Hsp+inv(Q)); % Uncertainty

#Start with Feb
#Ensure Htrans_feb is CSR for slicing
if not isinstance(Htrans_feb, csr_matrix):
    Htrans_feb = Htrans_feb.tocsr()

q_diag = Q_diag_feb.diagonal()          # shape: (n,)
n = Q_diag_feb.shape[0]
m = Psi_inv_feb.shape[0]
batch_size = 10000                      # adjust depending on memory

#Transpose H and ensure CSR format
H = Htrans_feb.T.tocsr()                # shape: (m x n)

#Scale H by q_diag and convert to CSR for slicing
H_scaled = H.multiply(q_diag).tocsr()   # still (m x n), element-wise column scaling

#Compute M_feb = H_scaled @ H_scaled.T @ Psi_inv_feb in chunks
M_feb = np.zeros((m, m))                # result is small (581 x 581)

for start in range(0, n, batch_size):
    end = min(start + batch_size, n)

    #Extract dense chunk: (m x batch_size)
    H_chunk = H_scaled[:, start:end].toarray()

    #Chunk contribution to M_feb
    M_feb += (H_chunk @ H_chunk.T) @ Psi_inv_feb

#Compute diagonal of final result: vshat_diag = q_diag + diag(H.T @ M_feb @ H)
vshat_diag_feb = np.empty(n)

for start in range(0, n, batch_size):
    end = min(start + batch_size, n)

    #Extract H chunk: (m x batch_size)
    H_chunk = H[:, start:end].toarray()

    #Compute diag(HT M H) for this chunk
    MH_chunk = M_feb @ H_chunk                  # (m x batch_size)
    diag_chunk = np.sum(MH_chunk * H_chunk, axis=0)  # shape: (batch_size,)

    #Final update
    vshat_diag_feb[start:end] = q_diag[start:end] + diag_chunk

#Setup for Jul
y_Hshat_jul = (y_jul_array_truth-Hshat_jul)
y_Hshat_jul=y_Hshat_jul[:,np.newaxis]
R_diag_inv_jul = np.linalg.inv(R_diagonal_Jul)
y_HshatTRy_Hshat_jul = y_Hshat_jul.T@R_diag_inv_jul@y_Hshat_jul
shat_priorT_jul = (shat_jul-prior_array_jul)
shat_priorT_jul=shat_priorT_jul[:,np.newaxis]

#Ensure Htrans_jul is CSR for slicing
if not isinstance(Htrans_jul, csr_matrix):
    Htrans_jul = Htrans_jul.tocsr()

#Setup
q_diag = Q_diag_jul.diagonal()          # shape: (n,)
n = Q_diag_jul.shape[0]
m = Psi_inv_jul.shape[0]
batch_size = 10000                      # adjust depending on memory

#Transpose H and ensure CSR format
H = Htrans_jul.T.tocsr()                # shape: (m x n)

#Scale H by q_diag and convert to CSR for slicing
H_scaled = H.multiply(q_diag).tocsr()   # still (m x n), element-wise column scaling

#Compute M_jul = H_scaled @ H_scaled.T @ Psi_inv_jul in chunks
M_jul = np.zeros((m, m))                # result is small (581 x 581)

for start in range(0, n, batch_size):
    end = min(start + batch_size, n)

    # Extract dense chunk: (m x batch_size)
    H_chunk = H_scaled[:, start:end].toarray()

    # Chunk contribution to M_jul
    M_jul += (H_chunk @ H_chunk.T) @ Psi_inv_jul

#Compute diagonal of final result: vshat_diag = q_diag + diag(H.T @ M_jul @ H)
vshat_diag_jul = np.empty(n)

for start in range(0, n, batch_size):
    end = min(start + batch_size, n)

    #Extract H chunk: (m x batch_size)
    H_chunk = H[:, start:end].toarray()

    #Compute diag(HT M H) for this chunk
    MH_chunk = M_jul @ H_chunk                  # (m x batch_size)
    diag_chunk = np.sum(MH_chunk * H_chunk, axis=0)  # shape: (batch_size,)

    vshat_diag_jul[start:end] = q_diag[start:end] + diag_chunk

print('Feb and Jul emissions diag uncertainty estimated!')
print('done')

Now lets look at the mean (which means that this code block is rearranging things).  But the inversion estimates hourly fluxes so you can change code to see how this looks at other time intervals.

In [None]:
#Rearranging shat, priors, and truths 
utctime_feb = np.load('G:/SummerSchool/output/prior/H_ACES_FFDAS_2019_02_utctimes.npy',allow_pickle=True)
utctime_jul = np.load('G:/SummerSchool/output/prior/H_ACES_FFDAS_2019_07_utctimes.npy',allow_pickle=True)
nhrs_back = 180
#February
#Shat
reshape_shat_feb = shat_feb.reshape(len(utctime_feb),num_fluxes)
trimmed_shat_feb = reshape_shat_feb[nhrs_back:-(nhrs_back-1),:]
flatten_shat_feb = trimmed_shat_feb.ravel(order='F')
mean_shat_feb = np.mean(trimmed_shat_feb,axis = 0)
mean_shat_feb_week = np.mean(trimmed_shat_feb, axis=1)
#Posterior Unc Vshat
reshape_vshat_diag_feb = vshat_diag_feb.reshape(len(utctime_feb),num_fluxes)
trimmed_vshat_diag_feb = reshape_vshat_diag_feb[nhrs_back:-(nhrs_back-1),:]
flatten_vshat_diag_feb = trimmed_vshat_diag_feb.ravel(order='F')
mean_vshat_diag_feb = np.mean(trimmed_vshat_diag_feb,axis = 0)
#Prior Unc Q
reshape_q_diag_feb = q_diag_feb.reshape(len(utctime_feb),num_fluxes)
trimmed_q_diag_feb = reshape_q_diag_feb[nhrs_back:-(nhrs_back-1),:]
flatten_q_diag_feb = trimmed_q_diag_feb.ravel(order='F')
mean_q_diag_feb = np.mean(trimmed_q_diag_feb,axis = 0)

#July
#Shat
reshape_shat_jul = shat_jul.reshape(len(utctime_jul),num_fluxes)
trimmed_shat_jul = reshape_shat_jul[nhrs_back:-(nhrs_back-1),:]
flatten_shat_jul = trimmed_shat_jul.ravel(order='F')
mean_shat_jul = np.mean(trimmed_shat_jul,axis = 0)
mean_shat_jul_week = np.mean(trimmed_shat_jul, axis=1)
#Posterior Unc Vshat
reshape_vshat_diag_jul = vshat_diag_jul.reshape(len(utctime_jul),num_fluxes)
trimmed_vshat_diag_jul = reshape_vshat_diag_jul[nhrs_back:-(nhrs_back-1),:]
flatten_vshat_diag_jul = trimmed_vshat_diag_jul.ravel(order='F')
mean_vshat_diag_jul = np.mean(trimmed_vshat_diag_jul,axis = 0)
#Prior Unc Q
reshape_q_diag_jul = q_diag_jul.reshape(len(utctime_jul),num_fluxes)
trimmed_q_diag_jul = reshape_q_diag_jul[nhrs_back:-(nhrs_back-1),:]
flatten_q_diag_jul = trimmed_q_diag_jul.ravel(order='F')
mean_q_diag_jul = np.mean(trimmed_q_diag_jul,axis = 0)


mean_shat_feb = mean_shat_feb.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know
mean_shat_jul = mean_shat_jul.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know
mean_vshat_diag_feb = mean_vshat_diag_feb.reshape(len(lon_grid),len(lat_grid)) #
mean_vshat_diag_jul = mean_vshat_diag_jul.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know

prior_array_feb = prior_array_feb.reshape(len(utctime_feb),num_fluxes)
prior_array_feb = prior_array_feb[nhrs_back:-(nhrs_back-1),:]
flatten_prior_array_feb = prior_array_feb.ravel(order='F')
mean_prior_array_feb = np.mean(prior_array_feb,axis = 0)
mean_prior_array_feb = mean_prior_array_feb.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know

prior_array_jul = prior_array_jul.reshape(len(utctime_jul),num_fluxes)
prior_array_jul = prior_array_jul[nhrs_back:-(nhrs_back-1),:]
flatten_prior_array_jul = prior_array_jul.ravel(order='F')
mean_prior_array_jul = np.mean(prior_array_jul,axis = 0)
mean_prior_array_jul = mean_prior_array_jul.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know

q_diag_array_feb = q_diag_feb.reshape(len(utctime_feb),num_fluxes)
q_diag_array_feb = q_diag_array_feb[nhrs_back:-(nhrs_back-1),:]
flatten_q_diag_array_feb = q_diag_array_feb.ravel(order='F')
mean_q_diag_array_feb = np.mean(q_diag_array_feb,axis = 0)
mean_q_diag_array_feb = mean_q_diag_array_feb.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know

q_diag_array_jul = q_diag_jul.reshape(len(utctime_jul),num_fluxes)
q_diag_array_jul = q_diag_array_jul[nhrs_back:-(nhrs_back-1),:]
flatten_q_diag_array_jul = q_diag_array_jul.ravel(order='F')
mean_q_diag_array_jul = np.mean(q_diag_array_jul,axis = 0)
mean_q_diag_array_jul = mean_q_diag_array_jul.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know

mean_prior_array_feb = mean_prior_array_feb.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know
mean_prior_array_jul = mean_prior_array_jul.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know
mean_q_diag_array_feb = mean_q_diag_array_feb.reshape(len(lon_grid),len(lat_grid)) #lon a
mean_q_diag_array_jul = mean_q_diag_array_jul.reshape(len(lon_grid),len(lat_grid))

#Truth Feb and July
truth_array_feb = truth_array_feb.reshape(len(utctime_feb),num_fluxes)
truth_array_feb = truth_array_feb[nhrs_back:-(nhrs_back-1),:]
flatten_truth_array_feb = truth_array_feb.ravel(order='F')
mean_truth_array_feb = np.mean(truth_array_feb,axis = 0)
mean_truth_array_feb = mean_truth_array_feb.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know

truth_array_jul = truth_array_jul.reshape(len(utctime_jul),num_fluxes)
truth_array_jul = truth_array_jul[nhrs_back:-(nhrs_back-1),:]
flatten_truth_array_jul = truth_array_jul.ravel(order='F')
mean_truth_array_jul = np.mean(truth_array_jul,axis = 0)
mean_truth_array_jul = mean_truth_array_jul.reshape(len(lon_grid),len(lat_grid)) #lon and lat may need to be flipped but don't know

print('Configured shat, prior emissions, emissions truth, prior Q, vshat diagonal!')
print('done')

Let's look at the overall statistics for the full domain.  What do they tell us?  Make sure you try base case first so you make sure everything works correctly.  The code DOES NOT calculate the chi-squared statistic.  This is something for you to do later.  It is an important metric to look at.

In [None]:
#%%
#Calculating Statistics
diff_prior_feb = mean_shat_feb-mean_prior_array_feb
diff_prior_jul = mean_shat_jul-mean_prior_array_jul

diff_truth_feb = mean_shat_feb-mean_truth_array_feb
diff_truth_jul = mean_shat_jul-mean_truth_array_jul

diff_truth_feb_flat = np.sum(flatten_shat_feb - flatten_truth_array_feb)/len(flatten_shat_feb)
diff_truth_jul_flat = np.sum(flatten_shat_jul - flatten_truth_array_jul)/len(flatten_shat_feb)

meandiff_feb_flat = round(diff_truth_feb_flat ,4)
meandiff_jul_flat = round(diff_truth_jul_flat,4)

def calculate_rmse(truth, estimated):
    assert truth.shape  == estimated.shape, "Arrays must be the same shape!"
    rmse = np.sqrt(np.nanmean((truth-estimated)**2))
    return rmse

rmse_feb = calculate_rmse(flatten_shat_feb,flatten_truth_array_feb)
rmse_jul = calculate_rmse(flatten_shat_jul,flatten_truth_array_jul)

corr_matrix_feb = np.corrcoef(Hshat_feb,y_feb_array_truth_hold)
ycorr_coef_feb = corr_matrix_feb[0,1]
corr_matrix_jul = np.corrcoef(Hshat_jul,y_jul_array_truth_hold)
ycorr_coef_jul = corr_matrix_jul[0,1]

corr_matrix_feb = np.corrcoef(Hshat_feb,y_feb_array_truth_hold)
corr_coef_feb = corr_matrix_feb[0,1]
corr_matrix_jul = np.corrcoef(Hshat_jul,y_jul_array_truth_hold)
corr_coef_jul = corr_matrix_jul[0,1]

corr_matrix_feb = np.corrcoef(flatten_truth_array_feb,flatten_shat_feb)
corr_coef_feb = corr_matrix_feb[0,1]
corr_matrix_jul = np.corrcoef(flatten_truth_array_jul,flatten_shat_jul)
corr_coef_jul = corr_matrix_jul[0,1]

std_yerr_jul= np.std(y_jul_array_truth_hold-Hshat_jul)/np.sqrt(len(y_jul_array_truth_hold))
std_yerr_feb= np.std(y_feb_array_truth_hold-Hshat_feb)/np.sqrt(len(y_feb_array_truth_hold))

std_err_feb = np.std(flatten_shat_feb - flatten_truth_array_feb)/np.sqrt(len(flatten_truth_array_feb))
std_err_jul = np.std(flatten_shat_jul - flatten_truth_array_jul)/np.sqrt(len(flatten_truth_array_jul))

print('Statistics:')
print('Mean difference (shat - truth) Feb = ' + str(meandiff_feb_flat)+' umol/m2s')
print('Mean difference (shat - truth) Jul = ' + str(meandiff_jul_flat)+' umol/m2s')
print('')
print("RMSE Feb = " + str(round(rmse_feb,4)) + ' umol/m2s')
print("RMSE Jul = " + str(round(rmse_jul,4)) + ' umol/m2s')
print('')
print("Correlation Coefficient (Hshat,y) Feb = " + str(round(ycorr_coef_feb,4)))
print("Correlation Coefficient (Hshat,y) Jul = " + str(round(ycorr_coef_jul,4)))
print('')
print("Correlation Coefficient (shat,sprior) Feb = " + str(round(corr_coef_feb,4)))
print("Correlation Coefficient (shat,sprior) Jul = " + str(round(corr_coef_jul,4)))
print('')
print("standard error (y) Feb = " + str(round(std_yerr_feb,4)) +' ppm')
print("standard error (y) Jul = " + str(round(std_yerr_jul,4)) +' ppm')
print('')
print("standard error Feb = " + str(round(std_err_feb,4)) +' umol/m2s')
print("standard error Jul = " + str(round(std_err_jul,4)) +' umol/m2s')
print('')

Let's check out how things look in space across our entire domain and look at differences.  There will be 6 plots that show the estimates, priors, and truths.  IF we aren't running the perfect case, we will also have four more plots looking at differences.  We don't show the base case because there will be small spurious noise.

In [None]:
#%%Plotting
print('Plotting - can take awhile')
ua_pathname = 'G:/NEC/Regional/shapefiles/Census/UrbanAreas/tl_2023_us_uac20.zip'
gdf_ua = gpd.read_file(ua_pathname)
gdf_ua.crs = "EPSG:4326"

# Select Baltimore areas
gdf_baltimore = gdf_ua[gdf_ua['NAME20'] == "Baltimore, MD"]
ua_pathname = 'G:/NEC/Regional/shapefiles/Census/UrbanAreas/tl_2023_us_uac20.zip'
gdf_ua = gpd.read_file(ua_pathname)
gdf_ua.crs = "EPSG:4326"
geometry = [-76.583, 39.315417] 
point = Point(geometry) 
twr_gdf_NEB = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[point])
geometry = [-76.685071, 39.344541]  
point = Point(geometry) 
twr_gdf_NWB = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[point])
geometry = [-76.675278, 39.255194]  
point = Point(geometry) 
twr_gdf_HAL = gpd.GeoDataFrame(crs="EPSG:4326", geometry=[point])

# Get bounds
minx, miny, maxx, maxy = gdf_baltimore.total_bounds
bounds = gdf_baltimore.total_bounds
mask_polygon = box(*bounds)

vmin_jul = np.percentile(flatten_shat_jul, 10)
vmax_jul = np.percentile(flatten_shat_jul, 90)
norm_jul = Normalize(vmin=vmin_jul,vmax=vmax_jul)

Plot_text = 'Estimates'
fig, ax = plt.subplots(2,3,figsize=(16,6), subplot_kw = {'projection':ccrs.PlateCarree()})
ax[0,0].add_feature(cfeature.COASTLINE)
ax[0,0].add_feature(cfeature.BORDERS)
ax[0,0].add_feature(cfeature.STATES)
mesh = ax[0,0].pcolormesh(lon_grid,lat_grid, mean_shat_jul,cmap='viridis', norm=norm_jul,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[0,0],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[0,0],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[0,0],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[0,0],marker='o', markersize=20, linewidth=0.5, color='red')
ax[0,0].set_title('Estimates July umols/m2s' + '(truth = ' + truth + ')', size = 10)
fig.colorbar(mesh, ax=ax[0,0])

Plot_text = 'Prior'
#mesh_grid = mean_truth_grid
ax[0,1].add_feature(cfeature.COASTLINE)
ax[0,1].add_feature(cfeature.BORDERS)
ax[0,1].add_feature(cfeature.STATES)
mesh = ax[0,1].pcolormesh(lon_grid,lat_grid, mean_prior_array_jul,cmap='viridis', norm=norm_jul,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[0,1],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[0,1],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[0,1],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[0,1],marker='o', markersize=20, linewidth=0.5, color='red')
ax[0,1].set_title(Plot_text+' July umols/m2s' + '(prior = ' + prior + ')', size = 10)
fig.colorbar(mesh, ax=ax[0,1])

Plot_text = 'Truth'
#mesh_grid = mean_truth_grid
ax[0,2].add_feature(cfeature.COASTLINE)
ax[0,2].add_feature(cfeature.BORDERS)
ax[0,2].add_feature(cfeature.STATES)
mesh = ax[0,2].pcolormesh(lon_grid,lat_grid, mean_truth_array_jul,cmap='viridis', norm=norm_jul,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[0,2],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[0,2],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[0,2],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[0,2],marker='o', markersize=20, linewidth=0.5, color='red')
ax[0,2].set_title(Plot_text+' July umols/m2s' + '(prior = ' + prior + ')', size = 10)
fig.colorbar(mesh, ax=ax[0,2])

Plot_text = 'Estimates'
vmin_feb = np.percentile(flatten_shat_feb, 10)
vmax_feb = np.percentile(flatten_shat_feb, 90)
norm_feb = Normalize(vmin=vmin_jul,vmax=vmax_jul)

ax[1,0].add_feature(cfeature.COASTLINE)
ax[1,0].add_feature(cfeature.BORDERS)
ax[1,0].add_feature(cfeature.STATES)
mesh = ax[1,0].pcolormesh(lon_grid,lat_grid, mean_shat_feb,cmap='viridis', norm=norm_feb,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[1,0],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[1,0],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[1,0],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[1,0],marker='o', markersize=20, linewidth=0.5, color='red')
ax[1,0].set_title('Estimates Feb umols/m2s' + '(truth = ' + truth + ')', size = 10)
fig.colorbar(mesh, ax=ax[1,0])

Plot_text = 'Prior'
ax[1,1].add_feature(cfeature.COASTLINE)
ax[1,1].add_feature(cfeature.BORDERS)
ax[1,1].add_feature(cfeature.STATES)
mesh = ax[1,1].pcolormesh(lon_grid,lat_grid, mean_prior_array_feb,cmap='viridis', norm=norm_feb,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[1,1],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[1,1],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[1,1],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[1,1],marker='o', markersize=20, linewidth=0.5, color='red')
ax[1,1].set_title(Plot_text+' Feb umols/m2s' + '(prior = ' + prior + ')', size = 10)
fig.colorbar(mesh, ax=ax[1,1])

Plot_text = 'Truth'
ax[1,2].add_feature(cfeature.COASTLINE)
ax[1,2].add_feature(cfeature.BORDERS)
ax[1,2].add_feature(cfeature.STATES)
mesh = ax[1,2].pcolormesh(lon_grid,lat_grid, mean_truth_array_feb,cmap='viridis', norm=norm_feb,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[1,2],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[1,2],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[1,2],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[1,2],marker='o', markersize=20, linewidth=0.5, color='red')
ax[1,2].set_title(Plot_text+' Feb umols/m2s' + '(prior = ' + prior + ')', size = 10)
fig.colorbar(mesh, ax=ax[1,2])

plt.subplots_adjust(wspace=0.01)
plt.show() 

vmin_jul_prior = np.percentile(diff_prior_jul, 10)
vmax_jul_prior = np.percentile(diff_prior_jul, 90)
norm_jul_prior = Normalize(vmin=vmin_jul_prior,vmax=vmax_jul_prior)

if not R_perfect:
    Plot_text = '[Estimates - Prior]'
    fig, ax = plt.subplots(2,2,figsize=(8,10), subplot_kw = {'projection':ccrs.PlateCarree()})
    ax[0,0].add_feature(cfeature.COASTLINE)
    ax[0,0].add_feature(cfeature.BORDERS)
    ax[0,0].add_feature(cfeature.STATES)
    mesh = ax[0,0].pcolormesh(lon_grid,lat_grid, diff_prior_jul,cmap='viridis', norm=norm_jul_prior,shading='auto',transform=ccrs.PlateCarree())
    gdf_baltimore.plot(ax=ax[0,0],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
    twr_gdf_NEB.plot(ax=ax[0,0],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_HAL.plot(ax=ax[0,0],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_NWB.plot(ax=ax[0,0],marker='o', markersize=20, linewidth=0.5, color='red')
    ax[0,0].set_title(Plot_text+' July umol/m2s', size = 10)
    fig.colorbar(mesh, ax=ax[0,0])

    vmin_jul_truth = np.percentile(diff_truth_jul, 10)
    vmax_jul_truth = np.percentile(diff_truth_jul, 90)
    norm_jul_truth = Normalize(vmin=vmin_jul_truth,vmax=vmax_jul_truth)
    Plot_text = '[Estimates - Truth]'
    ax[0,1].add_feature(cfeature.COASTLINE)
    ax[0,1].add_feature(cfeature.BORDERS)
    ax[0,1].add_feature(cfeature.STATES)
    mesh = ax[0,1].pcolormesh(lon_grid,lat_grid, diff_truth_jul,cmap='viridis', norm=norm_jul_truth,shading='auto',transform=ccrs.PlateCarree())
    gdf_baltimore.plot(ax=ax[0,1],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
    twr_gdf_NEB.plot(ax=ax[0,1],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_HAL.plot(ax=ax[0,1],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_NWB.plot(ax=ax[0,1],marker='o', markersize=20, linewidth=0.5, color='red')
    ax[0,1].set_title(Plot_text+' July umol/m2s', size = 10)
    fig.colorbar(mesh, ax=ax[0,1])

    vmin_feb_prior = np.percentile(diff_prior_feb, 10)
    vmax_feb_prior = np.percentile(diff_prior_feb, 90)
    norm_feb_prior = Normalize(vmin=vmin_feb_prior,vmax=vmax_feb_prior)
    Plot_text = '[Estimates - Prior]'
    ax[1,0].add_feature(cfeature.COASTLINE)
    ax[1,0].add_feature(cfeature.BORDERS)
    ax[1,0].add_feature(cfeature.STATES)
    mesh = ax[1,0].pcolormesh(lon_grid,lat_grid, diff_prior_feb,cmap='viridis', norm=norm_feb_prior,shading='auto',transform=ccrs.PlateCarree())
    gdf_baltimore.plot(ax=ax[1,0],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
    twr_gdf_NEB.plot(ax=ax[1,0],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_HAL.plot(ax=ax[1,0],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_NWB.plot(ax=ax[1,0],marker='o', markersize=20, linewidth=0.5, color='red')
    ax[1,0].set_title(Plot_text+' Feb umol/m2s', size = 10)
    fig.colorbar(mesh, ax=ax[1,0])

    vmin_feb_truth = np.percentile(diff_truth_feb, 10)
    vmax_feb_truth = np.percentile(diff_truth_feb, 90)
    norm_feb_truth = Normalize(vmin=vmin_feb_truth,vmax=vmax_feb_truth)
    Plot_text = '[Estimates - Truth]'
    ax[1,1].add_feature(cfeature.COASTLINE)
    ax[1,1].add_feature(cfeature.BORDERS)
    ax[1,1].add_feature(cfeature.STATES)
    mesh = ax[1,1].pcolormesh(lon_grid,lat_grid, diff_truth_feb,cmap='viridis', norm=norm_feb_truth,shading='auto',transform=ccrs.PlateCarree())
    gdf_baltimore.plot(ax=ax[1,1],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
    twr_gdf_NEB.plot(ax=ax[1,1],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_HAL.plot(ax=ax[1,1],marker='o', markersize=20, linewidth=0.5, color='red')
    twr_gdf_NWB.plot(ax=ax[1,1],marker='o', markersize=20, linewidth=0.5, color='red')
    ax[1,1].set_title(Plot_text+ ' Feb umol/m2s', size = 10)
    fig.colorbar(mesh, ax=ax[1,1])
    plt.show()   

Let's look at the amount of uncertainty reduced.  What do you see and why?  Remember that these are approx. uncertainties - so you will see some reduction even in the base case.

In [None]:
#%% Uncertainty reduction for both Feb and July
mesh_grid = (np.abs(mean_q_diag_array_jul - mean_vshat_diag_jul)/mean_q_diag_array_jul)*100
vmin = np.percentile(mesh_grid.flatten(), 1)
vmax = np.percentile(mesh_grid.flatten(), 99)
norm = Normalize(vmin=vmin,vmax=vmax)

fig, ax = plt.subplots(1,2,figsize=(8,10), subplot_kw = {'projection':ccrs.PlateCarree()})

ax[0].add_feature(cfeature.COASTLINE)
ax[0].add_feature(cfeature.BORDERS)
ax[0].add_feature(cfeature.STATES)
mesh = ax[0].pcolormesh(lon_grid,lat_grid, np.sqrt(mesh_grid),cmap='viridis', norm=norm,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[0],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[0],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[0],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[0],marker='o', markersize=20, linewidth=0.5, color='red')
ax[0].set_title('Uncertainty Reduction Jul %', size = 10)
fig.colorbar(mesh, ax=ax[0],shrink=0.5)

mesh_grid = np.abs(mean_q_diag_array_feb - mean_vshat_diag_feb)
vmin = np.percentile(mesh_grid.flatten(), 1)
vmax = np.percentile(mesh_grid.flatten(), 99)
norm = Normalize(vmin=vmin,vmax=vmax)

ax[1].add_feature(cfeature.COASTLINE)
ax[1].add_feature(cfeature.BORDERS)
ax[1].add_feature(cfeature.STATES)
mesh = ax[1].pcolormesh(lon_grid,lat_grid, np.sqrt(mesh_grid),cmap='viridis', norm=norm,shading='auto',transform=ccrs.PlateCarree())
gdf_baltimore.plot(ax=ax[1],color='black',edgecolor = 'black', alpha=0.3,transform=ccrs.PlateCarree())
twr_gdf_NEB.plot(ax=ax[1],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_HAL.plot(ax=ax[1],marker='o', markersize=20, linewidth=0.5, color='red')
twr_gdf_NWB.plot(ax=ax[1],marker='o', markersize=20, linewidth=0.5, color='red')
ax[1].set_title('Uncertainty Reduction Feb %', size = 10)
fig.colorbar(mesh, ax=ax[1],shrink=0.4)
plt.show()  

But what we really care about it the values inside our estimation domain (aka Baltimore). And we want to show in units that people can understand (not just the scientists!).

In [None]:
#%%
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt

lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
points = [Point(xy) for xy in zip(lon_2d.ravel(), lat_2d.ravel())]
grid_gdf = gpd.GeoDataFrame(geometry=points, crs=gdf_baltimore.crs)
mask_Balt = grid_gdf.within(gdf_baltimore.unary_union).values.reshape(lon_2d.shape)

days_feb, days_jul = 28, 31
sec_per_day = 86400

def flux_to_GgC(mean_flux, area_grid, days):
    mol_per_m2_s = mean_flux * 1e-6
    g_per_m2_s = mol_per_m2_s * 12.01
    g_per_cell_s = g_per_m2_s * area_grid
    g_per_cell = g_per_cell_s * days * sec_per_day
    return g_per_cell.sum() / 1e9

def std_flux_to_GgC(std_flux, area_grid, days):
    mol_per_m2_s = std_flux * 1e-6
    g_per_m2_s = mol_per_m2_s * 12.01
    g_per_cell_s = g_per_m2_s * area_grid
    g_per_cell = g_per_cell_s * days * sec_per_day
    return np.sqrt((g_per_cell**2).sum()) / 1e9

#Prior mean arrays
prior_feb = np.where(mask_Balt, mean_prior_array_feb, 0)
prior_jul = np.where(mask_Balt, mean_prior_array_jul, 0)

#Shat arrays
post_feb = np.where(mask_Balt, mean_shat_feb, 0)
post_jul = np.where(mask_Balt, mean_shat_jul, 0)

#truth_feb = np.where(mask_Balt, mean_truth_array_feb, 0)
#truth_jul = np.where(mask_Balt, mean_prior_array_jul, 0)

#Unc arrays
post_std_feb = np.where(mask_Balt, np.sqrt(mean_vshat_diag_feb), 0)
post_std_jul = np.where(mask_Balt, np.sqrt(mean_vshat_diag_jul), 0)

# Prior std arrays
prior_std_feb = np.where(mask_Balt, np.sqrt(mean_q_diag_array_feb), 0)
prior_std_jul = np.where(mask_Balt, np.sqrt(mean_q_diag_array_jul), 0)

R_earth = 6.371e6
dlat = np.radians(lat_grid[1] - lat_grid[0])  # radians
dlon = np.radians(lon_grid[1] - lon_grid[0])  # radians
lon_2d, lat_2d = np.meshgrid(lon_grid, lat_grid)
area_grid = (R_earth**2) * dlat * dlon * np.cos(np.radians(lat_2d))  # in m^2

prior_feb_GgC = flux_to_GgC(prior_feb, area_grid, days_feb)
prior_jul_GgC = flux_to_GgC(prior_jul, area_grid, days_jul)

post_feb_GgC = flux_to_GgC(post_feb, area_grid, days_feb)
post_jul_GgC = flux_to_GgC(post_jul, area_grid, days_jul)

#truth_feb_GgC = flux_to_GgC(truth_feb, area_grid, days_feb)
#truth_jul_GgC = flux_to_GgC(truth_jul, area_grid, days_jul)

post_unc_feb_GgC = std_flux_to_GgC(post_std_feb, area_grid, days_feb)
post_unc_jul_GgC = std_flux_to_GgC(post_std_jul, area_grid, days_jul)

prior_unc_feb_GgC = std_flux_to_GgC(prior_std_feb, area_grid, days_feb)
prior_unc_jul_GgC = std_flux_to_GgC(prior_std_jul, area_grid, days_jul)


labels = ['February', 'July']
prior_vals_Balt = [prior_feb_GgC, prior_jul_GgC]
post_vals_Balt = [post_feb_GgC, post_jul_GgC]
#truth_vals_Balt = [truth_feb_GgC, truth_jul_GgC]
post_uncs_Balt = [post_unc_feb_GgC, post_unc_jul_GgC]

x = np.arange(len(labels))
width = 0.25

fig, ax = plt.subplots(figsize=(7,5))

# Prior
ax.bar(x - width, prior_vals_Balt, width, label='Prior', color='lightgray')
# Posterior with error bars
ax.bar(x, post_vals_Balt, width, yerr=post_uncs_Balt*2, capsize=5, label='Posterior', color='cornflowerblue')
# Truth
#ax.bar(x + width, truth_vals_Balt, width, label='Truth', color='forestgreen')

ax.set_ylabel('Emissions (Gg C per month)')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
plt.title('Baltimore Emissions (Prior, Posterior, Truth)')

plt.tight_layout()
plt.show()

In [None]:
#%% More stats for that area
# Flatten all arrays
flat_shat_feb = mean_shat_feb.flatten()
flat_truth_feb = mean_truth_array_feb.flatten()
flat_shat_jul = mean_shat_jul.flatten()
flat_truth_jul = mean_truth_array_jul.flatten()

mask_Balt_flat = mask_Balt.flatten()

# Apply mask
shat_feb_Balt = flat_shat_feb[mask_Balt_flat]
truth_feb_Balt = flat_truth_feb[mask_Balt_flat]
shat_jul_Balt = flat_shat_jul[mask_Balt_flat]
truth_jul_Balt = flat_truth_jul[mask_Balt_flat]

def calculate_rmse(truth, estimated):
    return np.sqrt(np.nanmean((truth-estimated)**2))

#February
mean_diff_feb_Balt = np.mean(shat_feb_Balt - truth_feb_Balt)
rmse_feb_Balt = calculate_rmse(truth_feb_Balt, shat_feb_Balt)
corr_coef_feb_Balt = np.corrcoef(truth_feb_Balt, shat_feb_Balt)[0,1]
std_err_feb_Balt = np.std(shat_feb_Balt - truth_feb_Balt)/np.sqrt(len(truth_feb_Balt))

#July
mean_diff_jul_Balt = np.mean(shat_jul_Balt - truth_jul_Balt)
rmse_jul_Balt = calculate_rmse(truth_jul_Balt, shat_jul_Balt)
corr_coef_jul_Balt = np.corrcoef(truth_jul_Balt, shat_jul_Balt)[0,1]
std_err_jul_Balt = np.std(shat_jul_Balt - truth_jul_Balt)/np.sqrt(len(truth_jul_Balt))

print(f"Mean diff (shat - truth) Feb (Baltimore): {mean_diff_feb_Balt:.4f} µmol/m2/s")
print(f"Mean diff (shat - truth) Jul (Baltimore): {mean_diff_jul_Balt:.4f} µmol/m2/s")

print(f"RMSE Feb (Baltimore): {rmse_feb_Balt:.4f} µmol/m2/s")
print(f"RMSE Jul (Baltimore): {rmse_jul_Balt:.4f} µmol/m2/s")

print(f"Corr Coef Feb (Baltimore): {corr_coef_feb_Balt:.4f}")
print(f"Corr Coef Jul (Baltimore): {corr_coef_jul_Balt:.4f}")

print(f"Std Error Feb (Baltimore): {std_err_feb_Balt:.4f} µmol/m2/s")
print(f"Std Error Jul (Baltimore): {std_err_jul_Balt:.4f} µmol/m2/s")

How did these compare to what we had before for the entire domain?