# A notebook to compute 400-member prior-truth recons with different draws for all three models
# Added here: reconstructions of AMOC.

In [1]:
# put the directory path to your LMR repository here
import sys
sys.path.append("/Users/dan/Desktop/LMR_py3/")
#!cd /Users/dan/Desktop/LMR_py3

# prefix for figure filename
#fig_prefix='prior_truth_'

In [2]:
import os
os.chdir('/Users/dan/Desktop/LMR_py3')
import LMR_lite_utils as LMRlite
import LMR_utils
import LMR_config
import numpy as np
import os,sys
from time import time
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature
from cartopy.util import add_cyclic_point
%matplotlib inline
import cartopy.util as cutil
import cartopy.crs as ccrs
from cartopy.mpl.geoaxes import GeoAxes
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
from mpl_toolkits.axes_grid1 import AxesGrid
import matplotlib.cm as cm
import seaborn as sns
import pandas as pd


Loading information from datasets.yml
Loading information from grid_def.yml


In [3]:
# Load and interpolate all of the priors I want to use as truth

def mk_ad(lmr_names,model_names):
    
    # Initialize an array of various interpolated model fields
    ad = []

    for ii in np.arange(len(lmr_names)):
        dd = {}
        cfile = './configs/config.yml.nullspace.'+lmr_names[ii]
        yaml_file = os.path.join(LMR_config.SRC_DIR,cfile)
        cfg = LMRlite.load_config(yaml_file)

        X, Xb_one = LMRlite.load_prior(cfg)
        Xbp = Xb_one - Xb_one.mean(axis=1,keepdims=True)

        # check if config is set to regrid the prior
        if cfg.prior.regrid_method:
            print('regridding prior...')
            # this function over-writes X, even if return is given a different name
            [X,Xb_one_new] = LMRlite.prior_regrid(cfg,X,Xb_one,verbose=False)
        else:
            X.trunc_state_info = X.full_state_info

        Xb_one = Xb_one_new
        Xbp = Xb_one - Xb_one.mean(axis=1,keepdims=True)

        dd['X']            = X
        dd['Xbp']          = Xbp
        dd['name']         = model_names[ii]
        dd['lmr_name']     = lmr_names[ii]
        dd['grid']         = LMRlite.Grid(X)
        dd['prox_manager'] = LMRlite.load_proxies(cfg)
        dd['numprox']      = len(LMRlite.load_proxies(cfg).all_proxies) 

        print(ii)
        print(dd['name'])

        ad.append(dd)
        
    return ad



In [4]:
def mk_pproxies(X,Xbp,prox_manager,SNR,grid,seed=0):

    """
    Construct pseudoproxies 
    """
    
    [_,nens] = Xbp.shape

    numprox = len(prox_manager.ind_assim)

    vY = np.zeros([numprox,nens])
    vR = []
    vP = []
    
    np.random.seed(seed)

    for proxy_idx, Y in enumerate(prox_manager.sites_assim_proxy_objs()):
        # get grid indices
        tmp = grid.lat[:,0]-Y.lat
        itlat = np.argmin(np.abs(tmp))
        tmp = grid.lon[0,:]-Y.lon
        itlon = np.argmin(np.abs(tmp))
        npos = itlat*grid.nlon + itlon

        # Noise amplitude corresponding to SNR by stdev
        sig = np.std(Xbp[npos,:])
        #print(sig)
        #print(sig/SNR)
        # Make pproxies
        #import pdb
        #pdb.set_trace()
        randts = np.random.randn(nens,)
        randtsn = randts/np.std(randts)
        vY[proxy_idx,:] = Xbp[npos,:] + randtsn*sig/SNR
        vR.append((sig/SNR)**2)
        vP.append(proxy_idx)
        
    np.random.seed(None)

    return vY, vR, vP

In [5]:
def mk_yes(X,Xbp,prox_manager,grid):
    
    [_,nens] = Xbp.shape
    numprox = len(prox_manager.ind_assim)
    vYe = np.zeros([numprox,nens])
    vYe_coords = np.zeros([numprox,2])

    for proxy_idx, Y in enumerate(prox_manager.sites_assim_proxy_objs()):
        # get grid indices
        tmp = grid.lat[:,0]-Y.lat
        itlat = np.argmin(np.abs(tmp))
        tmp = grid.lon[0,:]-Y.lon
        itlon = np.argmin(np.abs(tmp))
        npos = itlat*grid.nlon + itlon
        # the ensemble prior estimates
        vYe[proxy_idx,:] = Xbp[npos,:]
        vYe_coords[proxy_idx,:] = X.coords[npos,:]

    return vYe, vYe_coords

In [65]:
'''
Set up and run prior-truth experiments.

ad is the dictionary of model output (all 1000 years for all)
pmi and tmi are the indices of models used for these experiments
Nensp and Nenst are the number of ensemble members used for prior and truth expectations
Nrealp and Nrealt are the number of realizations of prior and truth draws
'''
def mk_cdd(ad,pmi,tmi,Nensp,Nenst,SNR,LOCRAD,seed=None):
    
    np.random.seed(seed)
    
    # Determine the number of years (ens members) available in the prior
    nyrs  = len(ad[0]['AMOC'])

    # Draw random truth years
    tinds = np.random.choice(nyrs, Nenst, replace=False)

    # Next: varying random draws of prior ens that are different from the truth ens
    rem   = np.setdiff1d(np.arange(nyrs),tinds)

    remi  = np.random.choice(range(len(rem)), Nensp, replace=False)
    pinds = rem[remi]
    
    np.random.seed(None)

    # Call routine to generate a prior-truth experiment
    c = mk_pt(ad,pmi,tmi,SNR,LOCRAD,pinds,tinds)

    print('Done!') 
    return c

In [66]:
'''
Run prior-truth experiments
'''

def mk_pt(ad,pmi,tmi,SNR,LOCRAD,pinds,tinds):
    
    Nenst = len(tinds)
    Nensp = len(pinds)

    # Construct prior and truth ensembles of just SAT (no AMOC)
    Xbpp_na  = ad[pmi]['Xbp'][:,pinds]-ad[pmi]['Xbp'][:,pinds].mean(axis=1,keepdims=True)
    Xbpt_na  = ad[tmi]['Xbp'][:,tinds]-ad[tmi]['Xbp'][:,tinds].mean(axis=1,keepdims=True)
    
    # Compute effective SAT observations
    vYe, vYe_coords = mk_yes(ad[pmi]['X'],Xbpp_na,ad[pmi]['prox_manager'],ad[pmi]['grid'])

    # Make SAT pseudoproxies
    vY, vR, vP = mk_pproxies(ad[tmi]['X'],Xbpt_na,ad[tmi]['prox_manager'],SNR,ad[tmi]['grid'])
    
    # Append AMOC to state vectors across prior and truth ensemble members
    Xbpp_a  = ad[pmi]['AMOC'][pinds]-ad[pmi]['AMOC'][pinds].mean()
    Xbpt_a  = ad[tmi]['AMOC'][tinds]-ad[tmi]['AMOC'][tinds].mean()

    #import pdb
    #pdb.set_trace()
    Xbpp = np.concatenate([Xbpp_na,np.expand_dims(Xbpp_a,axis=0)])
    Xbpt = np.concatenate([Xbpt_na,np.expand_dims(Xbpt_a,axis=0)])

    # ... or only reconstruct AMOC!
    #Xbpp = np.expand_dims(Xbpp_a,axis=0)

    # Get the length of the appended state depending on choices above
    Las  = Xbpt.shape[0]
    
    # Loop over ensemble members in truth. f are the reconstructions.
    fp = np.empty([Las,Nenst])

    for kk in np.arange(Nenst):

        if LOCRAD==0.:
            # Use the optimal solver
            f,Xa,_ = LMRlite.Kalman_optimal(vY[:,kk],vR,vYe,Xbpp,verbose=False)
        else:

            # Load the config file corresponding to the prior
            cfile = './configs/config.yml.nullspace.'+ad[pmi]['lmr_name']
            yaml_file = os.path.join(LMR_config.SRC_DIR,cfile)
            cfg = LMRlite.load_config(yaml_file)

            # change the localization radius in the config file
            cfg_params = LMR_utils.param_cfg_update('core.loc_rad',LOCRAD)
            cfg_new = LMR_config.Config(**cfg_params)
            
            # Use the square root solver
            f,Xa = LMRlite.Kalman_ESRF(cfg_new,vY[:,kk],vR,vYe,Xbpp,X=ad[pmi]['X'],vYe_coords=vYe_coords,verbose=False)

        fp[:,kk]    = f

    # A few calculations...

    lat             = ad[tmi]['grid'].lat
    rmse            = np.mean((Xbpt[:-1,:]-fp[:-1,:])**2,1)**.5
    gm_rmse         = np.sum(np.cos(np.deg2rad(lat.ravel()))*rmse**2)/np.sum(np.cos(np.deg2rad(lat)))
    rmse_AMOC       = np.mean((Xbpt[-1,:]-fp[-1,:])**2)**.5

    # Store information in a dictionary for this prior-truth pair
    c = {}
    c['locrad']     = LOCRAD
    c['snr']        = SNR
    c['prior_name'] = ad[pmi]['name']
    c['truth_name'] = ad[tmi]['name']
    c['pind']       = pmi
    c['tind']       = tmi
    c['recon']      = fp[:-1,:]
    c['rmse']       = rmse
    c['grid']       = ad[tmi]['grid']
    c['gm_rmse']    = gm_rmse
    c['tinds']      = tinds
    c['pinds']      = pinds
    c['Nensp']      = Nensp
    c['Nenst']      = Nenst
    c['rmse_AMOC']  = rmse_AMOC
    c['recon_AMOC'] = fp[-1,:]
    c['Xbpt']       = Xbpt
    c['Xbpp']       = Xbpp
    c['Xbpp_a']     = Xbpp_a
    c['Xbpt_a']     = Xbpt_a

    return c

# Begin experiments!

In [56]:
# NB that these are all generated using the same seed (by virtue of multi_seed)
lmr_names = ['ccsm4_last_millenium.1000',
             'mpi-esm-p_last_millenium.1000']

model_names = ['CCSM4','MPI-ESM']

ad=mk_ad(lmr_names,model_names)

# AMOC
# these are in kg s-1

sys.path.append("/Users/dan/Dropbox (MIT)/2018-2019/Nullspace")
data_dir = '/Users/dan/Desktop/priors/'
outtimeavg = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
time = np.arange(850,1851)

from netCDF4 import Dataset, date2num, num2date
from load_gridded_data import read_gridded_data_CMIP5_model
from gridAvg import gridAvg

#model_names = ['CCSM4','MPI-ESM','HadCM3']

#ad=mk_ad(lmr_names,model_names)


data_vars  = {'AMOC26N1000m_Omon':0}
data_file  = 'AMOC26N1000m_Omon_CCSM4_past1000_085001-185012.nc'
ccsm4_amoc_obj = read_gridded_data_CMIP5_model(data_dir,data_file,data_vars,outtimeavg,detrend=None)
amoc_c = ccsm4_amoc_obj['AMOC26N1000m_Omon']['value']

data_vars  = {'AMOC26N1000m_Omon':0}
data_file  = 'AMOC26N1000m_Omon_MPI-ESM-P_past1000_085001-185012.nc'
mpi_amoc_obj = read_gridded_data_CMIP5_model(data_dir,data_file,data_vars,outtimeavg,detrend=None)
amoc_m = mpi_amoc_obj['AMOC26N1000m_Omon']['value']

# Pull out years to match the same order in which the SAT fields were drawn

ad[0]['AMOC'] = amoc_c[ad[0]['X'].prior_sample_indices]/10**9
ad[1]['AMOC'] = amoc_m[ad[1]['X'].prior_sample_indices]/10**9

Checking configuration ... 
OK!
Reading file:  /Users/dan/Desktop/LMR_py3/data/model/ccsm4_last_millenium/tas_sfc_Amon_CCSM4_past1000_085001-185012.nc
(12012, 192, 288)
indlat= 0  indlon= 1
Anomalies provided as the prior: Removing the temporal mean (for every gridpoint)...
tas : Global(monthly): mean= 8.072375e-07  , std-dev= 1.8899411
Averaging over month sequence: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
tas : Global(time-averaged): mean= 4.4424884352419226e-08  , std-dev= 0.8317386411161235
 
State vector information:
Nx = 55296
state_vect_info= {'tas_sfc_Amon': {'pos': (0, 55295), 'spacecoords': ('lat', 'lon'), 'spacedims': (192, 288), 'vartype': '2D:horizontal'}}
Random selection of 1000 ensemble members
regridding prior...
0 55295
(55296, 1000)
(55296, 2)
(55296, 2)
tas_sfc_Amon  : 2D lat/lon variable, truncating this variable
nlat,nlon: 192 288
=> Full array:      -11.247562408447266 8.779441833496094 0.0002476248215526498 0.8317324770123996
=> Truncated array: -11.1011901257028

In [87]:
def process_input(ad,pmiv,tmiv,Nensp,Nenst,SNR,savename,LOCRAD):

    cdd  = []
    Xbpt_dd = []
    
    for ii in np.arange(len(pmiv)):
        pmi = pmiv[ii]
        tmi = tmiv[ii]
        c   = mk_cdd(ad,pmi,tmi,Nensp,Nenst,SNR,LOCRAD,seed=0)
        
        cdd.append(c)
    
    fullsn = savename + '_locrad_' + str(LOCRAD)

    np.save(fullsn,cdd)
    np.save(fullsn+'_ad',Xbpt_dd)
   

In [88]:
# Loop over LOCRADs and make reconstructions as before, using this new CCSM4 as the prior and old CCSM4 as truth
# Redoing the zero locrad case to save truth because it's different every time...
SNR     = 0.4
Nensp   = 400
Nenst   = 400
pmiv    = [0,1,0,1]
tmiv    = [0,0,1,1]
savename = '/Users/dan/Desktop/Nullspace/pt_out/PAGES2k_400_rand_draws_one_truth_AMOC'

LOCRADs = [0.0]

from joblib import Parallel, delayed
import multiprocessing
#num_cores = multiprocessing.cpu_count()
num_cores = 3
nj = len(LOCRADs)
Parallel(n_jobs=nj)(delayed(process_input)(ad,pmiv,tmiv,Nensp,Nenst,SNR,savename,LOCRADs[i]) for i in np.arange(nj))

Done!
Done!
Done!
Done!


[None]

# Define analysis / plotting routines

In [89]:
# Load relevant files
cdd = np.load('/Users/dan/Desktop/Nullspace/pt_out/PAGES2k_400_rand_draws_one_truth_AMOC_locrad_0.0.npy')


In [90]:
def mk_gmp(cdd):

    # Organize AMOC results

    SNR = 0.4
    LOCRAD = 0.0

    gmp = pd.DataFrame()

    # indices of truth-truth experiments corresponding to each prior-truth (to use for FOE)
    tti  = np.array([0, 0, 3, 3])

    for ii in np.arange(len(cdd)):

        #fp       = cdd[ii]['recon_AMOC']

        # Truth
        m_st     = cdd[ii]['Xbpt_a']

        # Reconstruction
        m_sp     = cdd[ii]['recon_AMOC']
        
        # Reconstruction in corresponding truth-truth experiment
        m_tt     = cdd[tti[ii]]['recon_AMOC']

        # Compute truth variance for normalizing
        n_st     = np.var(m_st)

        errvar   = np.var(m_sp-m_st)
        tterrvar = np.var(m_tt-m_st)

        if ii==tti[ii]:
            FoE  = np.NaN
        else:
            FoE  = (errvar-tterrvar)/errvar

        nee = pd.DataFrame({"errvar":[errvar],
                    "wtd_errvar":[errvar/n_st],
#                    "n2s":[np.sqrt(errvar/n_st)],
                    "n2s":[errvar/n_st],
                    "FoE":[FoE],
                    "prior_name":cdd[ii]['prior_name'],
                    "truth_name":cdd[ii]['truth_name'],
                   })
        gmp=gmp.append(nee)
    return gmp

In [91]:
gmp = mk_gmp(cdd)