# Running GraphEM with the Command Line Interface (CLI)

## Generating the `configs.yml` file

In [1]:
%load_ext autoreload
%autoreload 2

import cfr
print(cfr.__version__)

In [2]:
job = cfr.ReconJob()
job.load_proxydb('PAGES2kv2')

In [3]:
job.filter_proxydb(by='ptype', keys=['coral.SrCa'])
job.annualize_proxydb(months=[12, 1, 2], ptypes=['coral'])
job.center_proxydb()

# obs: fetching & preprocessing
job.load_clim(tag='obs', path_dict={'tas': 'gistemp1200_GHCNv4_ERSSTv5'}, anom_period=[1951, 1980], rename_dict={'tas': 'tempanomaly'})

# obs: processing
job.annualize_clim(tag='obs', months=[12, 1, 2])
job.regrid_clim(tag='obs', nlat=42, nlon=63)
job.crop_clim(tag='obs', lat_min=-35, lat_max=35)

Annualizing ProxyDatabase: 100%|██████████| 29/29 [00:00<00:00, 40.05it/s]
Centering each of the ProxyRecord: 100%|██████████| 29/29 [00:00<00:00, 16457.15it/s]


[90m[1m>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
[0m

In [4]:
job.prep_graphem(
    recon_period=(1801, 2000),  # period to reconstruct
    calib_period=(1901, 2000),  # period for calibration
    verbose=True,
)

[36m[1m>>> job.configs["recon_period"] = [1801, 2000]
[0m[36m[1m>>> job.configs["recon_timescale"] = 1
[0m[36m[1m>>> job.configs["calib_period"] = [1901, 2000]
[0m[32m[1m>>> job.graphem_params["recon_time"] created
[0m[32m[1m>>> job.graphem_params["calib_time"] created
[0m[32m[1m>>> job.graphem_params["field_obs"] created
[0m[32m[1m>>> job.graphem_params["calib_idx"] created
[0m[32m[1m>>> job.graphem_params["field"] created
[0m[32m[1m>>> job.graphem_params["df_proxy"] created
[0m[32m[1m>>> job.graphem_params["proxy"] created
[0m[32m[1m>>> job.graphem_params["lonlat"] created
[0m

In [5]:
%%time

# GraphEM
job.run_graphem(
    save_dirpath='./recons/test-run-graphem-cfg',
    graph_method='neighborhood',
    cutoff_radius=1500,
    verbose=True,
)

[36m[1m>>> job.configs["compress_params"] = {'zlib': True}
[0m[36m[1m>>> job.configs["save_dirpath"] = ./recons/test-run-graphem-cfg
[0m[36m[1m>>> job.configs["save_filename"] = job_r01_recon.nc
[0m[36m[1m>>> job.configs["graph_method"] = neighborhood
[0m[36m[1m>>> job.configs["cutoff_radius"] = 1500
[0mComputing a neighborhood graph with R = 1500.0 km
Estimating graph using neighborhood method
Running GraphEM:



EM | dXmis: 0.0107; rdXmis: nan:   0%|          | 1/200 [00:50<2:47:33, 50.52s/it]

GraphEM.EM(): Tolerance achieved.
[32m[1mjob.graphem_solver created and saved to: None
[0m[32m[1m>>> job.recon_fields created
[0m[32m[1m>>> Reconstructed fields saved to: ./recons/test-run-graphem-cfg/job_r01_recon.nc
[0mCPU times: user 7min 36s, sys: 2min 12s, total: 9min 49s
Wall time: 54.8 s





In [6]:
job.save_cfg('./recons/test-run-graphem-cfg')

## Testing running the reconstruction job based on the generated `configs.yml` file

In [7]:
job_cfg = cfr.ReconJob()
job_cfg.run_graphem_cfg('./recons/test-run-graphem-cfg/configs.yml', verbose=True)

[36m[1m>>> job.configs["proxydb_path"] = PAGES2kv2
[0m[32m[1m>>> 692 records loaded
[0m[32m[1m>>> job.proxydb created
[0m[36m[1m>>> job.configs["filter_proxydb_args"] = []
[0m[36m[1m>>> job.configs["filter_proxydb_kwargs"] = {'by': 'ptype', 'keys': ['coral.SrCa']}
[0m[32m[1m>>> 29 records remaining
[0m[32m[1m>>> job.proxydb updated
[0m

Annualizing ProxyDatabase: 100%|██████████| 29/29 [00:00<00:00, 38.38it/s]
Centering each of the ProxyRecord: 100%|██████████| 29/29 [00:00<00:00, 19041.14it/s]


[36m[1m>>> job.configs["obs_path"] = {'tas': 'gistemp1200_GHCNv4_ERSSTv5'}
[0m[36m[1m>>> job.configs["obs_rename_dict"] = {'tas': 'tempanomaly'}
[0m[36m[1m>>> job.configs["obs_anom_period"] = [1951, 1980]
[0m[90m[1m>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
[0m[32m[1m>>> obs variables ['tas'] loaded
[0m[32m[1m>>> job.obs created
[0m[36m[1m>>> job.configs["obs_annualize_months"] = [12, 1, 2]
[0m[36m[1m>>> Processing tas ...
[0m[32m[1m>>> job.obs updated
[0m[36m[1m>>> job.configs["obs_regrid_nlat"] = 42
[0m[36m[1m>>> job.configs["obs_regrid_nlon"] = 63
[0m[36m[1m>>> Processing tas ...
[0m[36m[1m>>> job.configs["obs_lat_min"] = -35
[0m[36m[1m>>> job.configs["obs_lat_max"] = 35
[0m[36m[1m>>> job.configs["obs_lon_min"] = 0
[0m[36m[1m>>> job.configs["obs_lon_max"] = 360
[0m[36m[1m>>> Processing tas ...
[0m[36m[1m>>> job.configs["recon_period"] = [1801, 2000]
[

EM | dXmis: 0.0107; rdXmis: nan:   0%|          | 1/200 [00:54<3:00:52, 54.54s/it]

GraphEM.EM(): Tolerance achieved.





## Leveraging the CLI

In [9]:
!cfr -h

usage: cfr [-h] [-v] {da,graphem} ...

 cfr: a scripting system for CFR (Feng Zhu, fengzhu@ucar.edu)
----------------------------------------------------------------------------------------
 Usage example for DA:
    cfr da -c config.yml -vb -s 1 2 -r
    # -c config.yml: run the reconstruction job according to config.yml
    # -vb: output the verbose runtime information
    # -s 1 2: set seeds as integers from 1 to 2
    # -r: run the Monte-Carlo iterations for PDA

 Usage example for GraphEM:
    cfr graphem -c config.yml -vb
    # -c config.yml: run the reconstruction job according to config.yml
    # -vb: output the verbose runtime information
            

positional arguments:
  {da,graphem}   running mode
    da           run a DA-based reconstruction
    graphem      run a GraphEM-based reconstruction

optional arguments:
  -h, --help     show this help message and exit
  -v, --version  show program's version number and exit


In [10]:
!cfr graphem -c ./recons/test-run-graphem-cfg/configs.yml -vb

[36m[1m>>> job.configs["proxydb_path"] = PAGES2kv2
[0m[32m[1m>>> 692 records loaded
[0m[32m[1m>>> job.proxydb created
[0m[36m[1m>>> job.configs["filter_proxydb_args"] = []
[0m[36m[1m>>> job.configs["filter_proxydb_kwargs"] = {'by': 'ptype', 'keys': ['coral.SrCa']}
[0m[32m[1m>>> 29 records remaining
[0m[32m[1m>>> job.proxydb updated
Annualizing ProxyDatabase: 100%|████████████████| 29/29 [00:00<00:00, 47.03it/s]
Centering each of the ProxyRecord: 100%|█████| 29/29 [00:00<00:00, 25172.77it/s]
[36m[1m>>> job.configs["obs_path"] = {'tas': 'gistemp1200_GHCNv4_ERSSTv5'}
[0m[36m[1m>>> job.configs["obs_rename_dict"] = {'tas': 'tempanomaly'}
[0m[36m[1m>>> job.configs["obs_anom_period"] = [1951, 1980]
[0m[90m[1m>>> The target file seems existed at: ./data/gistemp1200_GHCNv4_ERSSTv5.nc.gz . Loading from it instead of downloading ...
[0m[32m[1m>>> obs variables ['tas'] loaded
[0m[32m[1m>>> job.obs created
[0m[36m[1m>>> job.configs["obs_annualize_months"] = [