In [None]:
####################################################################
#  THIS CELL IS ALL SETUP FOR EACH OF THE NOTEBOOKS
####################################################################

#-- Look for locally installed packages on NASA JupyterHub Resources
.libPaths(new=c("/home/rstudio/shared/lib/R-4.3/x86_64-pc-linux-gnu",.libPaths())) 
.libPaths()

if(Sys.getenv("AWS_WEB_IDENTITY_TOKEN_FILE") == ""){
 code_dir <- "/projects/ssim-ghg-2024/"
 data_dir <-  "/Users/aschuh/SSIM-GHG/data/"
 output_dir <- "~/temp/output/"
 }else{
 code_dir <-  "~/ssim-ghg-2024/"
 data_dir <-  "~/shared/ssim-ghg-data/inversion_examples/"
 output_dir <- "../../output/"
 }

Rcode_dir <- file.path(code_dir,"batch/")

setwd(Rcode_dir)

#######################################################
#-- ***Parent Directory and code for ALL inversions***
#######################################################
###############################################
#-- Load Code
##############################################
source(file.path(Rcode_dir,"util_code_032024.R"))
source(file.path(Rcode_dir,"plot_concentrations.R"))
source(file.path(Rcode_dir,"inversion_032024.R"))
source(file.path(Rcode_dir,"write_inversion_2_netcdf_032024.R"))
source(file.path(Rcode_dir,"generate_transcom_flux_ensemble_from_inversion.R"))
       
###############################################
#-- Required Libraries
###############################################
require(ncdf4)
require(plyr)
require(dplyr)
require(parallel)
require(ggplot2)
require(abind)
require(Matrix)
require(lattice)
require(memuse)
require(EnvStats)
require(gridExtra)
require(mvtnorm)
require(plotly)

########################
#--  Detect Cores
########################
print(paste("Num CPUs:",detectCores(),"cores"))
memuse::Sys.meminfo()
memuse::Sys.procmem()

In [None]:
###############################################
#--  Load sensitivity matrices 
###############################################

load(file.path(data_dir,"jacobians/","trunc_full_jacob_030624_with_dimnames_sib4_4x5_mask.rda"))
load(file.path(data_dir,"jacobians/","jacob_bgd_021624.rda"))

#-- We have units error somewhere here, need this for now
jacob <- jacob * 12/44
jacob_bgd <- jacob_bgd 

fire_fixed <- jacob_bgd[,2]
fossil_fixed <- jacob_bgd[,3]
###################################################################
#-- END END END ***Parent Directory and code for ALL inversions***
###################################################################


In [None]:

##################################################################
#- Inversion #1   *************************
##################################################################

#################################
#- Target truth in state space
#################################

##################################################################
#-- This array holds ratios of OCO2v10MIP fluxes and SiB4 fluxes
#-- as examples of "scalings" to be recovered. It also holds corresponding
#-- differences if the inversion attempts to directly solve for flux
#-- truth_array(24 months, 23 transcom, 98 inversions, (ratio, difference) )
##################################################################

#load("/projects/sandbox/inversion_workshop_scripts/truth_array.rda")
load(file.path(data_dir,"misc/truth_array.rda"))

xx = truth_array[,-1,1,1]

#state_vector_true= tm(as.vector(truth_array[,-1,1,1]),-1,1)

state_vector_true = c(rep(0.5,24*11),rep(-0.5,24*11))

#state_vector_mat = matrix(state_vector_true,nrow=24,byrow=FALSE)
#dim(state_vector_mat)
#state_vector_mat[,15]

In [None]:
#########################################################
# Generate a prior flux covariance matrix P_0
# Long term, a catalog of predefined choices is best here I think
#########################################################
land_prior_sd = 0.5
ocean_prior_sd = 1 


#-- induce temporal correlations
#sigma = bdiag(rep(list(ar_covariance(24, 0.5)), 22))
sigma = diag(rep(1,528))

#-- scale by variance for land/ocean
var_scaling_diagonal = diag(c(rep(land_prior_sd,24*11),rep(ocean_prior_sd,24*11)))
sigma = var_scaling_diagonal %*% sigma %*% t(var_scaling_diagonal)
#diag(sigma) = NA
#levelplot(as.matrix(sigma)[1:24,1:24],col.regions=my.col(20),at=seq(-0.5,0.5,length=20))

In [None]:
####################################################################################
#-- WHICH obs do you want to use in the inversion? 
#-- examples of selecting on stations, type of data, lat/lon box,etc
####################################################################################

#load(file.path(data_dir,"obs/obs_catalog_030624.rda")) # obs_catalog object
load(file.path(data_dir,"obs/obs_catalog_042424_unit_pulse_hour_timestamp_witherrors_withdates.rda")) 


#subset_indicator_obs=rep(FALSE,dim(jacob)[1])
subset_indicator_obs=rep(TRUE,dim(jacob)[1])

#subset_indicator_obs=c(rep(TRUE,156383),rep(FALSE,1000000))

############################
#-- SAMPLE BY TYPE EXAMPLE
############################
#subset_indicator_obs[obs_catalog$TYPE == "TCCON"] = TRUE
#subset_indicator_obs1 = rep(FALSE,length(subset_indicator_obs))
#subset_indicator_obs2 = rep(FALSE,length(subset_indicator_obs))

#subset_indicator_obs1[obs_catalog$TYPE == "OCO2"] = TRUE
#subset_indicator_obs2[seq(1,1156383,by=2)] = TRUE
#subset_indicator_obs = subset_indicator_obs1 & subset_indicator_obs2


############################
#-- SAMPLE BY NOAA STATION EXAMPLE
############################
#subset_indicator_obs[grep("spo",obs_catalog$ID)] = TRUE
#subset_indicator_obs[grep("lef",obs_catalog$ID)] = TRUE

############################
#-- SAMPLE BY TIME EXAMPLE
############################
#subset_indicator_obs[obs_catalog$TIME > 8738000] = TRUE

############################
#-- SAMPLE BY LON & LAT EXAMPLE
############################
#subset_indicator_obs[obs_catalog$LON < -10 & obs_catalog$LAT > 10] = TRUE

#subset_indicator_obs=c(rep(TRUE,1156382),rep(FALSE,1))
#subset_indicator_obs[seq(1,1156383,by=100)] = TRUE
#table(subset_indicator_obs)

############################
#-- Downsample if necessary
############################

if(sum(subset_indicator_obs) > 0.5*length(subset_indicator_obs)) {
  new_ind = rep(FALSE,length(subset_indicator_obs))
  new_ind[sample(x=grep(TRUE,subset_indicator_obs),size=floor(0.5*length(subset_indicator_obs)))] = TRUE
  print(paste("downsampling from",sum(subset_indicator_obs),"to",
              floor(0.5*length(subset_indicator_obs)),"observations"))
  subset_indicator_obs = new_ind
    }

#-- LEAVE THIS AS IT SUMMARIZES THE NUMBER OF OBS USED
print(paste("using",sum(subset_indicator_obs),"of",length(subset_indicator_obs),"observations"))

In [None]:
##########################################################
#-- sd for Gaussian i.i.d. errors, jacob is sens matrix
##########################################################
R_diagonal_in = rep(0.00001,(dim(jacob)[1]))
#R_diagonal_in = obs_catalog$SD

In [None]:
#############################################################
#-- Generate obs, 'y',  set.seed() ????
#-- currently leaving out bgd and all fixed
#-- non-optimizable contributions including fire and fossil
#############################################################

y_in = jacob %*% (1+state_vector_true) + rnorm(length(R_diagonal_in),sd=R_diagonal_in)


$$
\newcommand{\transpose}[1]{{#1^{\scriptscriptstyle T}}} 
J(x) = \transpose{(x_0 - x)} {\Sigma_x
}^{-1}(x_0 - x) + \transpose{(z - Hx)} {\Sigma_z}^{-1}(z - Hx)\\
$$

$$
\newcommand{\transpose}[1]{{#1^{\scriptscriptstyle T}}} 
\hat{x} = (\transpose{H}{\Sigma_z}^{-1}H + {\Sigma_x}^{-1})^{-1}(\transpose{H}{\Sigma_z}^{-1}(z-Hx)+{\Sigma_x}^{-1}x_0)
$$

$$
\newcommand{\transpose}[1]{{#1^{\scriptscriptstyle T}}} 
\Sigma_{\hat{x}} = {({\Sigma_x}^{-1} + \transpose{H}{\Sigma_z}^{-1}H )}^{-1}
$$







In [None]:
############################
#-- Run the actual inversion
############################

ret2 = invert_clean(H=jacob,R_diagonal=R_diagonal_in,P_0=sigma,y=y_in,H_bgd=jacob_bgd,
                    subset_indicator_obs=subset_indicator_obs,DOF=TRUE,output_Kalman_Gain=FALSE)



In [None]:
#hist(ret2$posterior$x_hat[,1])
options(repr.plot.width=8, repr.plot.height=8)
plot(state_vector_true,ret2$posterior$x_hat,pch=16,cex=1.5,col=c(rep("orange",264),rep("blue",264)),
     xlab="True State Scaling",ylab="Estimated State Scaling",main="Estimated state vector vs true state vector (all time and regions)")

In [None]:
org_data = generate_transcom_flux_ensemble_from_inversion(inv_object=ret2,samples=1000)

In [None]:
#-- induce temporal correlations
#sigma = bdiag(rep(list(ar_covariance(24, 0.5)), 22))
sigma = diag(rep(1,528))

#-- scale by variance for land/ocean
var_scaling_diagonal = diag(c(rep(land_prior_sd,24*11),rep(ocean_prior_sd,24*11)))
sigma = var_scaling_diagonal %*% sigma %*% t(var_scaling_diagonal)
#diag(sigma) = NA
#levelplot(as.matrix(sigma)[1:24,1:24],col.regions=my.col(20),at=seq(-0.5,0.5,length=20))

In [None]:
ret3 = invert_clean(H=jacob,R_diagonal=R_diagonal_in,P_0=sigma,y=y_in,H_bgd=jacob_bgd,
                    subset_indicator_obs=subset_indicator_obs,DOF=TRUE,output_Kalman_Gain=FALSE)


In [None]:
org_data2 = generate_transcom_flux_ensemble_from_inversion(inv_object=ret3,samples=1000)

In [None]:
plot_timeseries_flux_bytranscom(ret=org_data)

In [None]:
plot_timeseries_flux_bytranscom(ret=org_data2)

In [None]:
plot_transcom_flux_by_month(ret=org_data)

In [None]:
plot_transcom_flux_by_month(ret=org_data2)

In [None]:
#-- Plotting prior/post correlation across 2 year flux average
#-- Correlations are estimated from sample in orig_data hence
#-- Prior shows "some" correlation when none exists due to noise

plot_inversion_correlations(org_data = org_data) 


In [None]:
plot_inversion_correlations_by_transcom(org_data=org_data)

In [None]:
plot_concentrations(inversion=ret2,add_prior_nee=FALSE,add_fossil=FALSE,add_fire=FALSE)

In [None]:
sessionInfo()