In [None]:
from var4d_components import Var4D_Components
from visualize_results import Monte_Carlo_avg

# Estimate errors in fluxes from an ensemble of inversions

A variational inversion, unlike a Batch inversion, does not automatically provide the posterior covariance matrix $\mathbf{\hat{S}_x}$. There are a few ways to calculate an approximation to that, two being most common.

### Use a Monte Carlo ensemble (brute force)

By definition, $\mathbf{\hat{S}_x}$ reflects the probability distribution of the posterior flux if the probability distribution of the prior and the observations are specified by $\mathbf{S_x}$ and $\mathbf{S_z}$ respectively. Therefore, if we do a number of inversions with different priors whose distribution is consistent with $\mathbf{S_x}$, and different observations whose distribution is consistent with $\mathbf{S_z}$, then the distribution of the posterior fluxes __should__ give us $\mathbf{\hat{S}_x}$. 

Creating random prior fluxes and observations for this is non-trivial because of off-diagonal elements in $\mathbf{S_x}$ and $\mathbf{S_z}$ which represent the joint probability distribution between fluxes or observations at different times and locations. This is where preconditioning comes in handy, because the transformation

$$
\mathbf{x} = \mathbf{L}\xi + \mathbf{x}_0, \quad \text{where}\,\, \mathbf{S_x} = \mathbf{L}\mathbf{L}^\text{T}
$$

ensures that if the elements of $\xi$ are independently and normally distributed (mean zero, standard deviation one), then $\mathbf{x}$ will have the covariance $\mathbf{S_x}$. We could similarly produce an ensemble of correlated random observations through "preconditioning" them according to $\mathbf{S_z}$ if required.

### Use the approximation of the inverse Hessian from the optimizer

Some optimizers (including BFGS, which we will be primarily using) produce an approximation of the inverse Hessian, where the Hessian is $\cal{H} = (\partial^2 J/\partial\xi^2)$. This is typical of conjugate gradient optimizers, which try to construct the leading eigenvalues and eigenvectors of the Hessian matrix. The posterior covariance we need is $\mathbf{\hat{S}_x} = (\partial^2 J/\partial\mathbf{x}^2)^{-1}$. __Use the transformation between $\mathbf{x}$ and $\xi$ above to show that $\mathbf{\hat{S}_x} = \mathbf{L}^\text{T} \cal{H}^{-1} \mathbf{L}$__. If the optimizer gives us an approximation of $\cal{H}^{-1}$, we can transform that into $\mathbf{\hat{S}_x}$.

### Construct uncertainties on aggregates from the full covariance matrix

<span style="color:red"> Add text here on how to calculate errors on space/time aggregates. </span>

## Example 1: Assimilate all in situ observations
### Step 1: Ensemble of inversions with perturbed priors and observations

Do __not__ execute this cell unless you have time to wait. We have already performed this ensemble of inversions. To use that output, copy over `~/shared/ssim-ghg-data/precomputed_output/inversion_examples/var4d/mip_is_mc_reco` to your output folder `~/inversion_output/` with
```
rsync -av ~/shared/ssim-ghg-data/precomputed_output/inversion_examples/var4d/mip_is_mc_reco ~/inversion_output/
```

In [None]:
prior_flux_unc_dict = {'prior_unc_source': 'reco', 'prior_unc_scale': {'land': 0.25, 'ocean': 0.5}}
flux_corr_structure = {'temp_corr': 2.0} # 2-month temporal correlation, no horizontal correlation
obs_assim_dict = {'is': True} # all in situ obs from the MIP
for i in range(1,101):
    inv_name = 'mip_is_mc_reco/%03i'%i
    var4d = Var4D_Components(inv_name, verbose=True, store_intermediate=False)
    var4d.var4d_setup(obs_to_assim=obs_assim_dict, corr_structure=flux_corr_structure, perturb_obs=True, perturb_flux=True, **prior_flux_unc_dict)
    var4d.var4d_chain(max_iter=500, optim_method='BFGS')

### Step 2: A single inversion with unperturbed prior and observations

In [None]:
flux_corr_structure = {'temp_corr': 2.0} # 2-month temporal correlation, no horizontal correlation
obs_assim_dict = {'is': True} # all in situ obs from the MIP
prior_flux_unc_dict = {'prior_unc_source': 'reco', 'prior_unc_scale': {'land': 0.25, 'ocean': 0.5}}
var4d = Var4D_Components('mip_is_reco', verbose=False, store_intermediate=True)
var4d.var4d_setup(obs_to_assim=obs_assim_dict, corr_structure=flux_corr_structure, **prior_flux_unc_dict)
var4d.var4d_chain(max_iter=500, optim_method='BFGS')

### Step 3: Summarize the ensemble runs

In [None]:
from visualize_results import Monte_Carlo_avg
mc = Monte_Carlo_avg('mip_is_mc_reco')
mc.summarize_observations()
mc.summarize_emissions()

### Step 4: Plot regional flux totals and observations with uncertainties

In [None]:
from visualize_results import Visualize_Fluxes, Visualize_Obs

#### Regional fluxes
##### Using the Monte Carlo ensemble to calculate errors

In [None]:
vf = Visualize_Fluxes('mip_is_reco')
vf.plot_region(['North America', 'South America', 'South American Tropical', 'Tropical Asia'], plot_errs=True)

##### Using the BFGS-derived approximation to the Hessian to calculate errors

In [None]:
vf.plot_region(['North America', 'South America', 'South American Tropical', 'Tropical Asia'], plot_errs=True, err_source='hessian')

#### In situ observations

In [None]:
po = Visualize_Obs('mip_is_reco')
# po.plot_site(['mlo','wkt','amt', 'smo'])
po.plot_site(['mlo','wkt','amt', 'smo'], plot_errs=True)

#### Correlations between annual fluxes

In [None]:
mc = Monte_Carlo_avg('mip_is_mc_reco')
mc.plot_annual_correlations()

## Example 2: Assimilate just the flask obs from NOAA observatories MLO, SMO, SPO, BRW
### Step 1: Ensemble of inversions with perturbed priors and observations
Do __not__ execute this cell unless you have time to wait. We have already performed this ensemble of inversions. To use that output, copy over `~/shared/ssim-ghg-data/precomputed_output/inversion_examples/var4d/noaa_observatories_mc_reco` to your output folder `~/inversion_output/`.

In [None]:
flux_corr_structure = {'temp_corr': 2.0} # 2-month temporal correlation, no horizontal correlation
obs_assim_dict = {'sites': ['mlo', 'spo', 'brw', 'smo']} # just the four observatories
prior_flux_unc_dict = {'prior_unc_source': 'reco', 'prior_unc_scale': {'land': 0.25, 'ocean': 0.5}}
for i in range(73,101): # finish incomplete inversion ensemble
    inv_name = 'noaa_observatories_mc_reco/%03i'%i
    var4d = Var4D_Components(inv_name, verbose=True, store_intermediate=False)
    var4d.var4d_setup(obs_to_assim=obs_assim_dict, corr_structure=flux_corr_structure, perturb_obs=True, perturb_flux=True, **prior_flux_unc_dict)
    var4d.var4d_chain(max_iter=500, optim_method='BFGS')

### Step 2: A single inversion with unperturbed prior and observations

In [None]:
flux_corr_structure = {'temp_corr': 2.0} # 2-month temporal correlation, no horizontal correlation
obs_assim_dict = {'sites': ['mlo', 'spo', 'brw', 'smo']} # just the four observatories
prior_flux_unc_dict = {'prior_unc_source': 'reco', 'prior_unc_scale': {'land': 0.25, 'ocean': 0.5}}
var4d = Var4D_Components('noaa_observatories_reco', verbose=False, store_intermediate=True)
var4d.var4d_setup(obs_to_assim=obs_assim_dict, corr_structure=flux_corr_structure, **prior_flux_unc_dict)
var4d.var4d_chain(max_iter=500, optim_method='BFGS')

### Step 3: Summarize the ensemble runs

In [None]:
from visualize_results import Monte_Carlo_avg
mc = Monte_Carlo_avg('noaa_observatories_mc_reco')
mc.summarize_observations()
mc.summarize_emissions()

### Step 4: Plot regional flux totals and observations with uncertainties

In [None]:
from visualize_results import Visualize_Fluxes, Visualize_Obs

#### Regional fluxes
##### Using the Monte Carlo ensemble to calculate errors

In [None]:
vf = Visualize_Fluxes('noaa_observatories_reco')
# vf.plot_region(['North American Boreal', 'North American Temperate', 'South American Tropical', 'South American Temperate'])
vf.plot_region(['North America', 'South America', 'South American Tropical', 'Tropical Asia'], plot_errs=True)

##### Using the BFGS-derived approximation to the Hessian to calculate errors

In [None]:
vf = Visualize_Fluxes('noaa_observatories_reco')
vf.plot_region(['North America', 'South America', 'South American Tropical', 'Tropical Asia'], plot_errs=True, err_source='hessian')

#### In situ observations

In [None]:
po = Visualize_Obs('noaa_observatories_reco')
# po.plot_site(['mlo','wkt','amt', 'smo'])
po.plot_site(['mlo','wkt','amt', 'smo'], plot_errs=True)

#### Correlations between annual fluxes

In [None]:
mc = Monte_Carlo_avg('noaa_observatories_mc_reco')
mc.plot_annual_correlations()

## Example 3: Assimilate all OCO2 observations
### Step 1: Ensemble of inversions with perturbed priors and observations
Do __not__ execute this cell unless you have time to wait. We have already performed this ensemble of inversions. To use that output, copy over `~/shared/ssim-ghg-data/precomputed_output/inversion_examples/var4d/mip_oco2_mc_reco` to your output folder `~/inversion_output/`.

In [None]:
flux_corr_structure = {'temp_corr': 2.0} # 2-month temporal correlation, no horizontal correlation
obs_assim_dict = {'oco2': True} # all OCO2 obs from the MIP
prior_flux_unc_dict = {'prior_unc_source': 'reco', 'prior_unc_scale': {'land': 0.25, 'ocean': 0.5}}
for i in range(71,101):
    inv_name = 'mip_oco2_mc_reco/%03i'%i
    var4d = Var4D_Components(inv_name, verbose=True, store_intermediate=False)
    var4d.var4d_setup(obs_to_assim=obs_assim_dict, corr_structure=flux_corr_structure, perturb_obs=True, perturb_flux=True, **prior_flux_unc_dict)
    var4d.var4d_chain(gradnorm=1.0E-5)

### Step 2: A single inversion with unperturbed prior and observations

In [None]:
flux_corr_structure = {'temp_corr': 2.0} # 2-month temporal correlation, no horizontal correlation
obs_assim_dict = {'oco2': True} # all OCO2 obs from the MIP
prior_flux_unc_dict = {'prior_unc_source': 'reco', 'prior_unc_scale': {'land': 0.25, 'ocean': 0.5}}
var4d = Var4D_Components('mip_oco2_reco', verbose=False, store_intermediate=True)
var4d.var4d_setup(obs_to_assim=obs_assim_dict, corr_structure=flux_corr_structure, **prior_flux_unc_dict)
var4d.var4d_chain(gradnorm=1.0E-5)

### Step 3: Summarize the ensemble

In [None]:
from visualize_results import Monte_Carlo_avg
mc = Monte_Carlo_avg('mip_oco2_mc_reco')
mc.summarize_observations()
mc.summarize_emissions()

### Step 4: Plot regional flux totals and observations with uncertainties

#### Regional fluxes
##### Using the Monte Carlo ensemble to calculate errors

In [None]:
vf = Visualize_Fluxes('mip_oco2_reco')
vf.plot_region(['North America', 'South America', 'South American Tropical', 'Tropical Asia'], plot_errs=True)

##### Using the BFGS-derived approximation to the Hessian to calculate errors

In [None]:
vf = Visualize_Fluxes('mip_oco2_reco')
vf.plot_region(['North America', 'South America', 'South American Tropical', 'Tropical Asia'], plot_errs=True, err_source='hessian')

#### In situ observations (which were not assimilated)

In [None]:
po = Visualize_Obs('mip_oco2_reco')
po.plot_site(['mlo','wkt','amt', 'smo'], plot_errs=True)

#### Correlations between annual fluxes

In [None]:
mc = Monte_Carlo_avg('mip_oco2_mc_reco')
mc.plot_annual_correlations()