Hi Luke! Things you will have to change in this notebook:

1. PATH variables in next cell
2. Change lmr_names and model_names to reflect all the additional models
3. Make new config files corresponding to each prior and put them in the CFGPATH directory. As you'll see below, I set these to be of format config.yml.superprior.lmr_names\[i\] so e.g. when I was using CCSM4 LM, config.yml.superprior.ccsm4_last_millenium.

After that, once you get all the modules loaded (using the lmr_py3 environment), I think it should be good to go to compute pseudoproxy experiments for arbitrary combinations of model priors (including superpriors).

As I recall, the parallel computation takes ~1 day for the 3x3 problem on 8 cores.

I have lots of plotting routines that make use of the resulting dictionary of sister experiments that I'm happy to send along later once I comment them :-p

In [None]:
# Luke: define system-specific file paths here

CFGPATH  = ''
SAVEPATH = ''
LMRPATH  = ''

# Define an array of prior model names used in file names and config files.
# Examples are what I used.
lmr_names = ['ccsm4_last_millenium',
             'mpi-esm-p_last_millenium',
             'hadcm3_last_millenium']

# Helpful to have a human readable list for plotting etc.
model_names = ['CCSM4','MPI-ESM','HadCM3']

# In principle, shouldn't have to change anything after this. In practice...

In [None]:
# Import necessary modules. Might not all be needed for now but eventually
# will for plotting.

import os
import sys
import LMR_lite_utils as LMRlite
import LMR_utils
import LMR_config
import numpy as np
from time import time
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature
from cartopy.util import add_cyclic_point
%matplotlib inline
import cartopy.util as cutil
import cartopy.crs as ccrs
from cartopy.mpl.geoaxes import GeoAxes
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
from mpl_toolkits.axes_grid1 import AxesGrid
import matplotlib.cm as cm

sys.path.append(LMRPATH)


In [None]:
# Load proxies to use their locations

# Load a config file to use in processing data.
cfile = CFGPATH+'/config.yml.superprior.'+lmr_names[0]
yaml_file = os.path.join(LMR_config.SRC_DIR,cfile)
cfg = LMRlite.load_config(yaml_file)

# Load proxies
prox_manager = LMRlite.load_proxies(cfg)
numprox = len(prox_manager.all_proxies)

In [None]:
# Load and interpolate all of the priors I want to use as truth.
# This should be readily scalable to an arbitrary number of models.

# Initialize a single array of all of the interpolated model fields.
# Facilitates doing things with for loops later, so this array gets used a lot!
ad = []

for ii in np.arange(len(lmr_names)):
    
    # Define a dictionary of model attributes that will be populated here
    # and then appended to ad
    dd = {}

    # Get the config file set
    cfile = CFGPATH+'/config.yml.superprior.'+lmr_names[ii]
    yaml_file = os.path.join(LMR_config.SRC_DIR,cfile)
    cfg = LMRlite.load_config(yaml_file)
    
    # Load the prior
    X, Xb_one = LMRlite.load_prior(cfg)

    # check if config is set to regrid the prior. We're regridding everything to 20c
    if cfg.prior.regrid_method:
        print('regridding prior...')
        # this function over-writes X, even if return is given a different name
        [X,Xb_one_new] = LMRlite.prior_regrid(cfg,X,Xb_one,verbose=False)
    else:
        X.trunc_state_info = X.full_state_info
    
    Xb_one = Xb_one_new
    
    # Compute Xb prime, residual from the mean
    Xbp = Xb_one - Xb_one.mean(axis=1,keepdims=True)
    grid = LMRlite.Grid(X)    

    # populate the dictionary for this particular model
    dd['X']        = X
    dd['Xbp']      = Xbp
    dd['name']     = model_names[ii]
    dd['lmr_name'] = lmr_names[ii]
    dd['grid']     = grid

    print(ii)
    print(dd['name'])

    ad.append(dd)
        


In [None]:
# Define some routines (modified from LMR code) to compute 
# pseudoproxies and y_es

def mk_pproxies(X,Xbp,prox_manager,SNR,grid):

    """
    Construct pseudoproxies 
    """

    numprox = len(prox_manager.ind_assim)
    vY = np.zeros([numprox,grid.nens])
    vR = []
    vP = []

    for proxy_idx, Y in enumerate(prox_manager.sites_assim_proxy_objs()):

        # Get grid indices
        tmp = grid.lat[:,0]-Y.lat
        itlat = np.argmin(np.abs(tmp))
        tmp = grid.lon[0,:]-Y.lon
        itlon = np.argmin(np.abs(tmp))
        npos = itlat*grid.nlon + itlon

        # Noise amplitude corresponding to SNR by stdev
        sig = np.std(Xbp[npos,:])

        # Make pproxies
        vY[proxy_idx,:] = Xbp[npos,:] + np.random.randn(grid.nens,)*sig/SNR
        vR.append((sig/SNR)**2)
        vP.append(proxy_idx)

    return vY, vR, vP

def mk_yes(X,Xbp,prox_manager,grid):

    """
    Construct yes
    """

    vYe = np.zeros([numprox,grid.nens])
    vYe_coords = np.zeros([numprox,2])

    for proxy_idx, Y in enumerate(prox_manager.sites_assim_proxy_objs()):

        # Get grid indices
        tmp = grid.lat[:,0]-Y.lat
        itlat = np.argmin(np.abs(tmp))
        tmp = grid.lon[0,:]-Y.lon
        itlon = np.argmin(np.abs(tmp))
        npos = itlat*grid.nlon + itlon
        
        # The ensemble prior estimates
        vYe[proxy_idx,:] = Xbp[npos,:]
        vYe_coords[proxy_idx,:] = X.coords[npos,:]

    return vYe, vYe_coords

In [None]:
# Define a function to compare what happens for various truth-prior pairs.
# This is what does the heavy lifting! Made it a function so that it can be 
# run in parallel below as a function of LOCRAD and SNR.

def process_input(LOCRAD,SNR):

    NENS = grid.nens

    # Initialize array of dictionaries containing the different comparisons
    cdd = []

    # Loop over prior files
    for ii in np.arange(len(lmr_names)):

        # Load the config file corresponding to the iith prior
        cfile = './configs/config.yml.nullspace.'+lmr_names[ii]
        yaml_file = os.path.join(LMR_config.SRC_DIR,cfile)
        cfg = LMRlite.load_config(yaml_file)

        # Compute effective observations
        vYe, vYe_coords = mk_yes(ad[ii]['X'],ad[ii]['Xbp'],prox_manager,ad[ii]['grid'])

        # change the localization radius in the config file
        cfg_params = LMR_utils.param_cfg_update('core.loc_rad',LOCRAD)
        cfg_new = LMR_config.Config(**cfg_params)

        # Loop over truth files
        for jj in np.arange(len(lmr_names)):

            # Make pseudoproxies
            vY, vR, vP = mk_pproxies(ad[jj]['X'],ad[jj]['Xbp'],prox_manager,SNR,ad[jj]['grid'])

            fp = np.empty([ad[ii]['grid'].nlon*ad[ii]['grid'].nlat,NENS])

            # Loop over ensemble members in truth. f are the reconstructions.
            # Use the fast solver if not localizing.
            for kk in np.arange(NENS):

                if LOCRAD==0.:
                    f,Xa,_ = LMRlite.Kalman_optimal(vY[:,kk],vR,vYe,ad[ii]['Xbp'],verbose=False)
                else:
                    f,Xa = LMRlite.Kalman_ESRF(cfg_new,vY[:,kk],vR,vYe,ad[ii]['Xbp'],X=ad[ii]['X'],vYe_coords=vYe_coords,verbose=False)

                xam = Xa.mean(axis=1)
                Xap = np.subtract(Xa,xam[:,None])
                fp[:,kk]    = f

            # Store information in a dictionary for this prior-truth pair
            c = {}
            c['locrad']     = LOCRAD
            c['snr']        = SNR
            c['prior_name'] = ad[ii]['name']
            c['truth_name'] = ad[jj]['name']
            c['pind']       = ii
            c['tind']       = jj
            c['recon']      = fp
            c['rmse']       = np.mean((ad[jj]['Xbp'][:,:NENS]-fp)**2,1)**.5
            c['Xa']         = Xa


            # Concatenate the dictionary to a list corresponding to all prior-truth pairs 
            # for the choices of LOCRAD and SNR
            cdd.append(c)

    # Save all prior-truth pairs for these choices of LOCRAD and SNR
    print('saving for LOCRAD = '+ str(LOCRAD) + ' and SNR = '+ str(SNR))
    np.save(SAVEPATH+'/superprior_locrad_' + str(LOCRAD)+ '_snr_' + str(SNR),cdd)
print('Done!')        

In [None]:
# Run in parallel
SNR = 0.4
LOCRADs = [0.,5000.,10000.,25000.]
nj = len(LOCRADs)

from joblib import Parallel, delayed
import multiprocessing

num_cores = multiprocessing.cpu_count()

Parallel(n_jobs=nj)(delayed(process_input)(LOCRADs[i],SNR) for i in np.arange(nj))