# budget_cdo_nco_draft2017b.ipynb

## Purpose
Use CDO and NCO to post-process MARC aerosol budget data from project [p17c-marc-comparison](https://github.com/grandey/p17c-marc-comparison).

## Requirements
- Climate Data Operators (CDO)
- NetCDF Operators (NCO)
- CESM output data, post-processed to time-series format, as described in [data_management.org](https://github.com/grandey/p17c-marc-comparison/blob/master/manage_data/data_management.org#syncing-to-local-machine-for-analysis). These data are archived at https://doi.org/10.6084/m9.figshare.5687812.

## Author
Benjamin S. Grandey, 2018

## Acknowledgments
Thanks are due to **Alex Avramov**.  The interpretation of the diagnostics are informed by *print_MARC_budget_Benjamin.pro*, an IDL script written by Alex Avramov.

In [1]:
! date

Fri Aug 17 15:17:50 +08 2018


In [2]:
from glob import glob
import os
import re
import shutil

## CDO and NCO version information

In [3]:
! cdo --version
! ncks --version

Climate Data Operators version 1.9.1 (http://mpimet.mpg.de/cdo)
Compiled: by root on squall2.local (x86_64-apple-darwin17.2.0) Nov  2 2017 18:28:19
CXX Compiler: /usr/bin/clang++ -std=gnu++11 -pipe -Os -stdlib=libc++ -arch x86_64  -D_THREAD_SAFE -pthread
CXX version : unknown
C Compiler: /usr/bin/clang -pipe -Os -arch x86_64  -D_THREAD_SAFE -pthread
C version : unknown
Features: DATA PTHREADS HDF5 NC4/HDF5 OPeNDAP SZ UDUNITS2 PROJ.4 CURL FFTW3 SSE4_1
Libraries: HDF5/1.10.1 proj/4.93 curl/7.56.1
Filetypes: srv ext ieg grb1 nc1 nc2 nc4 nc4c nc5 
     CDI library version : 1.9.1 of Nov  2 2017 18:27:49
 CGRIBEX library version : 1.9.0 of Sep 29 2017 10:16:02
  NetCDF library version : 4.4.1.1 of Oct  6 2017 14:14:42 $
    HDF5 library version : 1.10.1
 SERVICE library version : 1.4.0 of Nov  2 2017 18:27:47
   EXTRA library version : 1.4.0 of Nov  2 2017 18:27:46
     IEG library version : 1.4.0 of Nov  2 2017 18:27:46
    FILE library version : 1.8.3 of Nov  2 2017 18:27:46

NCO netCDF O

## Directory locations for input and output NetCDF files
The data in the input directory (*in_dir*) are available via Figshare: https://doi.org/10.6084/m9.figshare.5687812.

In [4]:
# Input data directory
in_dir = os.path.expandvars('$HOME/data/figshare/figshare5687812/')
#in_dir = os.path.expandvars('$HOME/data/projects/p17c_marc_comparison/output_timeseries/')

# Output data directory
out_dir = os.path.expandvars('$HOME/data/projects/p17c_marc_comparison/budget_cdo_nco_draft2017b/')

## Clean output data directory

In [5]:
for filename in glob('{}/*.nc'.format(out_dir)):
    print('Deleting {}'.format(filename.split('/')[-1]))
    os.remove(filename)
for filename in glob('{}/*.nco'.format(out_dir)):
    print('Deleting {}'.format(filename.split('/')[-1]))
    os.remove(filename)
for filename in glob('{}/*.tmp'.format(out_dir)):
    print('Deleting {}'.format(filename.split('/')[-1]))
    os.remove(filename)
! date

Fri Aug 17 15:17:50 +08 2018


## Calculate annual means of standard MARC 2D diagnostic variables

In [6]:
variable_list = ['mOCSF', 'mBCSF',  # surface emissions
                 'DST01SF', 'DST02SF', 'DST03SF', 'DST04SF',
                 'SSLT01SF', 'SSLT02SF', 'SSLT03SF', 'SSLT04SF',
                 'mACCDRY', 'mAITDRY', 'mNUCDRY',  # dry deposition
                 'mOCDRY', 'mMOSDRY', 'mOIMDRY', 'mBCDRY', 'mMBSDRY', 'mBIMDRY',
                 'DST01DD', 'DST02DD', 'DST03DD', 'DST04DD',
                 'SSLT01DD', 'SSLT02DD', 'SSLT03DD', 'SSLT04DD'
                ]
for variable in variable_list:
    for year in ['2000', '1850']:
        # Check if input file exists
        in_filename = '{}/p17c_marc_s2_{}.cam.h0.{}.nc'.format(in_dir, year, variable)
        if os.path.isfile(in_filename):
            print('{}, marc_s2, {}'.format(variable, year))
            # Calculate annual means using NCO (with years starting in January)
            annual_filename = '{}/marc_s2_{}_{}_ANN.nc'.format(out_dir, year, variable)
            ! ncra -O --mro -d time,,,12,12 {in_filename} {annual_filename}
            print('  Written {}'.format(annual_filename.split('/')[-1]))
! date 

mOCSF, marc_s2, 2000
  Written marc_s2_2000_mOCSF_ANN.nc
mOCSF, marc_s2, 1850
  Written marc_s2_1850_mOCSF_ANN.nc
mBCSF, marc_s2, 2000
  Written marc_s2_2000_mBCSF_ANN.nc
mBCSF, marc_s2, 1850
  Written marc_s2_1850_mBCSF_ANN.nc
DST01SF, marc_s2, 2000
  Written marc_s2_2000_DST01SF_ANN.nc
DST01SF, marc_s2, 1850
  Written marc_s2_1850_DST01SF_ANN.nc
DST02SF, marc_s2, 2000
  Written marc_s2_2000_DST02SF_ANN.nc
DST02SF, marc_s2, 1850
  Written marc_s2_1850_DST02SF_ANN.nc
DST03SF, marc_s2, 2000
  Written marc_s2_2000_DST03SF_ANN.nc
DST03SF, marc_s2, 1850
  Written marc_s2_1850_DST03SF_ANN.nc
DST04SF, marc_s2, 2000
  Written marc_s2_2000_DST04SF_ANN.nc
DST04SF, marc_s2, 1850
  Written marc_s2_1850_DST04SF_ANN.nc
SSLT01SF, marc_s2, 2000
  Written marc_s2_2000_SSLT01SF_ANN.nc
SSLT01SF, marc_s2, 1850
  Written marc_s2_1850_SSLT01SF_ANN.nc
SSLT02SF, marc_s2, 2000
  Written marc_s2_2000_SSLT02SF_ANN.nc
SSLT02SF, marc_s2, 1850
  Written marc_s2_1850_SSLT02SF_ANN.nc
SSLT03SF, marc_s2, 2000
  Writte

## Calculate column integrals from 3D rate data
kg/kg/s -> kg/m2/s

In [7]:
# List of 3D rate variables of interest
variable_list = [
    'mACCWET', 'mAITWET', 'mNUCWET',  # impaction scavenging
    'mOCWET', 'mMOSWET', 'mOIMWET', 'mBCWET', 'mMBSWET', 'mBIMWET',
    'DST01PP', 'DST02PP', 'DST03PP', 'DST04PP', 'SSLT01PP', 'SSLT02PP', 'SSLT03PP', 'SSLT04PP',
    'AGEmOC', 'AGEmMOS', 'AGEmBC', 'AGEmMBS', # aging
    'mACCACT', 'mAITACT', 'mNUCACT',  # nucleation scavenging by stratiform clouds
    'mOCACT', 'mMOSACT', 'mOIMACT', 'mBCACT', 'mMBSACT', 'mBIMACT',
    'mACCSCV', 'mAITSCV', 'mNUCSCV',  # nucleation scavenging by convection
    'mOCSCV', 'mMOSSCV', 'mOIMSCV', 'mBCSCV', 'mMBSSCV', 'mBIMSCV',
    'mSO4evap',  # SO4 cloud evaporation -> ACC
    'BNUCmNUC',  # binary nucleation -> NUC
    'CNDmACC', 'CNDmAIT', 'CNDmNUC', 'CNDmMOS', 'CNDmMBS',  # H2SO4 condensation
    'MVmNUC', 'MVmAIT',  # mass mode adjustment, NUC->AIT, AIT->ACC
    'CGmNUCAC', 'CGmNUCAI', 'CGmNUCMB', 'CGmNUCMO',  # coagulation
    'CGmAITAC', 'CGmAITMB', 'CGmAITMO', 'CGmACCMB', 'CGmACCMO', 'CGmOCMOS']

In [8]:
# Calculate column integrals
for year in ['2000', '1850']:  # loop over emission years
    ! date
    print('year = {}'.format(year))
    # Copy the surface pressure file - necessary for decoding of the hybrid coordinates
    print('  Copying surface pressure file')
    in_filename = '{}/p17c_marc_s2_{}.cam.h0.PS.nc'.format(in_dir, year)
    ps_filename = '{}/temp_marc_s2_{}_PS.nc'.format(out_dir, year)
    shutil.copy2(in_filename, ps_filename)
    # Create file containing NCO commands for calculation of air mass in each model level
    nco_filename = '{}/temp_marc_s2_{}.nco'.format(out_dir, year)
    nco_file = open(nco_filename, 'w')
    nco_file.writelines(['*P_bnds[time,ilev,lat,lon]=hyai*P0+hybi*PS;\n',  # pressures at bounds
                         '*P_delta[time,lev,lat,lon]=P_bnds(:,1:30,:,:)-P_bnds(:,0:29,:,:);\n',  # deltas
                         'mass_air=P_delta/9.807;'])  # mass of air
    nco_file.close()
    # Calculate mass of air in each model level
    print('  Calculating mass of air in each model level')
    mass_air_filename = '{}/temp_marc_s2_{}_mass_air.nc'.format(out_dir, year)
    ! ncap2 -O -v -S {nco_filename} {ps_filename} {mass_air_filename}
    # Loop over rate data variables
    for variable in variable_list:  # dust
        ! date
        print('  variable = {}'.format(variable))
        # Copy the rate file
        print('    Copying the file for {}'.format(variable))
        in_filename = '{}/p17c_marc_s2_{}.cam.h0.{}.nc'.format(in_dir, year, variable)
        rate_filename = '{}/temp_marc_s2_{}_{}.nc'.format(out_dir, year, variable)
        shutil.copy2(in_filename, rate_filename)
        # Append the mass of air in each model level
        print('    Appending mass_air')
        ! ncks -A {mass_air_filename} {rate_filename}
        # Calculate the mass rate of the variable
        print('    Calculating the mass rate of {} in each model level'.format(variable))
        mass_variable_filename = '{}/temp_marc_s2_{}_mass_{}.nc'.format(out_dir, year, variable)
        ! ncap2 -O -s 'mass_{variable}=mass_air*{variable}' {rate_filename} {mass_variable_filename}
        # Sum over levels to calculate column integral (and exclude unwanted variables)
        print('    Summing over levels')
        column_filename = '{}/temp_marc_s2_{}_column_{}.nc'.format(out_dir, year, variable)
        ! ncwa -O -x -v mass_air,{variable} -a lev -y sum {mass_variable_filename} {column_filename}
        # Rename variable
        print('    Renaming variable to c{}'.format(variable))
        ! ncrename -v mass_{variable},c{variable} {column_filename} >/dev/null 2>/dev/null
        # Set units and long_name
        print('    Setting units and long_name')
        ! ncatted -a 'units',c{variable},o,c,'kg/m2/s' {column_filename}
        ! ncatted -a 'long_name',c{variable},o,c,'{variable} column integral' {column_filename}
        # Calculate annual means (with years starting in January)
        print('    Calculating annual means')
        annual_filename = '{}/marc_s2_{}_c{}_ANN.nc'.format(out_dir, year, variable)
        ! ncra -O --mro -d time,,,12,12 {column_filename} {annual_filename}
        print('    Written {}'.format(annual_filename.split('/')[-1]))
        # Remove three temporary files
        for filename in [rate_filename, mass_variable_filename, column_filename]:
            print('    Removing {}'.format(filename.split('/')[-1]))
            os.remove(filename)
    # Remove another two temporary files
    for filename in [ps_filename, mass_air_filename, nco_filename]:
            print('  Removing {}'.format(filename.split('/')[-1]))
            os.remove(filename)
! date

Fri Aug 17 15:18:33 +08 2018
year = 2000
  Copying surface pressure file
  Calculating mass of air in each model level
Fri Aug 17 15:19:30 +08 2018
  variable = mACCWET
    Copying the file for mACCWET
    Appending mass_air
    Calculating the mass rate of mACCWET in each model level
    Summing over levels
    Renaming variable to cmACCWET
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cmACCWET_ANN.nc
    Removing temp_marc_s2_2000_mACCWET.nc
    Removing temp_marc_s2_2000_mass_mACCWET.nc
    Removing temp_marc_s2_2000_column_mACCWET.nc
Fri Aug 17 15:20:43 +08 2018
  variable = mAITWET
    Copying the file for mAITWET
    Appending mass_air
    Calculating the mass rate of mAITWET in each model level
    Summing over levels
    Renaming variable to cmAITWET
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cmAITWET_ANN.nc
    Removing temp_marc_s2_2000_mAITWET.nc
    Removing temp_marc_s2_2000_mass_mAITWET.nc


    Summing over levels
    Renaming variable to cAGEmOC
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cAGEmOC_ANN.nc
    Removing temp_marc_s2_2000_AGEmOC.nc
    Removing temp_marc_s2_2000_mass_AGEmOC.nc
    Removing temp_marc_s2_2000_column_AGEmOC.nc
Fri Aug 17 15:39:10 +08 2018
  variable = AGEmMOS
    Copying the file for AGEmMOS
    Appending mass_air
    Calculating the mass rate of AGEmMOS in each model level
    Summing over levels
    Renaming variable to cAGEmMOS
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cAGEmMOS_ANN.nc
    Removing temp_marc_s2_2000_AGEmMOS.nc
    Removing temp_marc_s2_2000_mass_AGEmMOS.nc
    Removing temp_marc_s2_2000_column_AGEmMOS.nc
Fri Aug 17 15:40:18 +08 2018
  variable = AGEmBC
    Copying the file for AGEmBC
    Appending mass_air
    Calculating the mass rate of AGEmBC in each model level
    Summing over levels
    Renaming variable to cAGEmBC
    Setting units and

  variable = mOIMSCV
    Copying the file for mOIMSCV
    Appending mass_air
    Calculating the mass rate of mOIMSCV in each model level
    Summing over levels
    Renaming variable to cmOIMSCV
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cmOIMSCV_ANN.nc
    Removing temp_marc_s2_2000_mOIMSCV.nc
    Removing temp_marc_s2_2000_mass_mOIMSCV.nc
    Removing temp_marc_s2_2000_column_mOIMSCV.nc
Fri Aug 17 16:00:05 +08 2018
  variable = mBCSCV
    Copying the file for mBCSCV
    Appending mass_air
    Calculating the mass rate of mBCSCV in each model level
    Summing over levels
    Renaming variable to cmBCSCV
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cmBCSCV_ANN.nc
    Removing temp_marc_s2_2000_mBCSCV.nc
    Removing temp_marc_s2_2000_mass_mBCSCV.nc
    Removing temp_marc_s2_2000_column_mBCSCV.nc
Fri Aug 17 16:01:04 +08 2018
  variable = mMBSSCV
    Copying the file for mMBSSCV
    Appending mass_air
 

    Removing temp_marc_s2_2000_mass_CGmAITAC.nc
    Removing temp_marc_s2_2000_column_CGmAITAC.nc
Fri Aug 17 16:12:38 +08 2018
  variable = CGmAITMB
    Copying the file for CGmAITMB
    Appending mass_air
    Calculating the mass rate of CGmAITMB in each model level
    Summing over levels
    Renaming variable to cCGmAITMB
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cCGmAITMB_ANN.nc
    Removing temp_marc_s2_2000_CGmAITMB.nc
    Removing temp_marc_s2_2000_mass_CGmAITMB.nc
    Removing temp_marc_s2_2000_column_CGmAITMB.nc
Fri Aug 17 16:13:19 +08 2018
  variable = CGmAITMO
    Copying the file for CGmAITMO
    Appending mass_air
    Calculating the mass rate of CGmAITMO in each model level
    Summing over levels
    Renaming variable to cCGmAITMO
    Setting units and long_name
    Calculating annual means
    Written marc_s2_2000_cCGmAITMO_ANN.nc
    Removing temp_marc_s2_2000_CGmAITMO.nc
    Removing temp_marc_s2_2000_mass_CGmAITMO.nc
    Re

    Removing temp_marc_s2_1850_column_DST03PP.nc
Fri Aug 17 16:26:11 +08 2018
  variable = DST04PP
    Copying the file for DST04PP
    Appending mass_air
    Calculating the mass rate of DST04PP in each model level
    Summing over levels
    Renaming variable to cDST04PP
    Setting units and long_name
    Calculating annual means
    Written marc_s2_1850_cDST04PP_ANN.nc
    Removing temp_marc_s2_1850_DST04PP.nc
    Removing temp_marc_s2_1850_mass_DST04PP.nc
    Removing temp_marc_s2_1850_column_DST04PP.nc
Fri Aug 17 16:26:57 +08 2018
  variable = SSLT01PP
    Copying the file for SSLT01PP
    Appending mass_air
    Calculating the mass rate of SSLT01PP in each model level
    Summing over levels
    Renaming variable to cSSLT01PP
    Setting units and long_name
    Calculating annual means
    Written marc_s2_1850_cSSLT01PP_ANN.nc
    Removing temp_marc_s2_1850_SSLT01PP.nc
    Removing temp_marc_s2_1850_mass_SSLT01PP.nc
    Removing temp_marc_s2_1850_column_SSLT01PP.nc
Fri Aug 17 16

    Calculating annual means
    Written marc_s2_1850_cmBIMACT_ANN.nc
    Removing temp_marc_s2_1850_mBIMACT.nc
    Removing temp_marc_s2_1850_mass_mBIMACT.nc
    Removing temp_marc_s2_1850_column_mBIMACT.nc
Fri Aug 17 16:41:01 +08 2018
  variable = mACCSCV
    Copying the file for mACCSCV
    Appending mass_air
    Calculating the mass rate of mACCSCV in each model level
    Summing over levels
    Renaming variable to cmACCSCV
    Setting units and long_name
    Calculating annual means
    Written marc_s2_1850_cmACCSCV_ANN.nc
    Removing temp_marc_s2_1850_mACCSCV.nc
    Removing temp_marc_s2_1850_mass_mACCSCV.nc
    Removing temp_marc_s2_1850_column_mACCSCV.nc
Fri Aug 17 16:41:55 +08 2018
  variable = mAITSCV
    Copying the file for mAITSCV
    Appending mass_air
    Calculating the mass rate of mAITSCV in each model level
    Summing over levels
    Renaming variable to cmAITSCV
    Setting units and long_name
    Calculating annual means
    Written marc_s2_1850_cmAITSCV_ANN.nc


    Calculating the mass rate of MVmAIT in each model level
    Summing over levels
    Renaming variable to cMVmAIT
    Setting units and long_name
    Calculating annual means
    Written marc_s2_1850_cMVmAIT_ANN.nc
    Removing temp_marc_s2_1850_MVmAIT.nc
    Removing temp_marc_s2_1850_mass_MVmAIT.nc
    Removing temp_marc_s2_1850_column_MVmAIT.nc
Fri Aug 17 16:56:08 +08 2018
  variable = CGmNUCAC
    Copying the file for CGmNUCAC
    Appending mass_air
    Calculating the mass rate of CGmNUCAC in each model level
    Summing over levels
    Renaming variable to cCGmNUCAC
    Setting units and long_name
    Calculating annual means
    Written marc_s2_1850_cCGmNUCAC_ANN.nc
    Removing temp_marc_s2_1850_CGmNUCAC.nc
    Removing temp_marc_s2_1850_mass_CGmNUCAC.nc
    Removing temp_marc_s2_1850_column_CGmNUCAC.nc
Fri Aug 17 16:57:00 +08 2018
  variable = CGmNUCAI
    Copying the file for CGmNUCAI
    Appending mass_air
    Calculating the mass rate of CGmNUCAI in each model level
    

## Derive additional variables of interest
- Total sulfate, total OC, total BC, total dust, total sea-salt.
- Other "derived" variables (including renamed variables for ease of reading into budget_tables_draft2017.ipynb).
- Sum of sources and sinks for each species.

In [9]:
derived_variable_list = [
    # Total sulfate. Note: mode mass adjustment and coagulation balance to zero.
    'ctSUL_DRY=-mNUCDRY-mAITDRY-mACCDRY-mMOSDRY-mMBSDRY+mOIMDRY+mBIMDRY',  # dry deposition
    'ctSUL_IMP=cmNUCWET+cmAITWET+cmACCWET+cmMOSWET+cmMBSWET-cmOIMWET-cmBIMWET',  # impaction scavenging
    'ctSUL_ACT=cmNUCACT+cmAITACT+cmACCACT+cmMOSACT+cmMBSACT-cmOIMACT-cmBIMACT',  # strat. nuc. scav.
    'ctSUL_SCV=cmNUCSCV+cmAITSCV+cmACCSCV+cmMOSSCV+cmMBSSCV-cmOIMSCV-cmBIMSCV',  # conv. nuc. scav.
    'ctSUL_CND=cCNDmNUC+cCNDmAIT+cCNDmACC+cCNDmMOS+cCNDmMBS',  # H2SO4 condensation
    'ctSUL_BNUC=cBNUCmNUC',  # binary nucleation
    'ctSUL_EVAP=cmSO4evap',  # SO4 cloud evaporation
    'ctSUL_AGEsrc=cAGEmMOS+cAGEmMBS',  # aging source
    'ctSUL_SOURCE=ctSUL_CND+ctSUL_BNUC+ctSUL_EVAP+ctSUL_AGEsrc',  # sum of sources
    'ctSUL_SINK=ctSUL_DRY+ctSUL_IMP+ctSUL_ACT+ctSUL_SCV',  # sum of sinks
    # Total OC. Note: aging and coagulation balance to zero.
    'ctOC_SF=mOCSF',  # emissions (including SOA)
    'ctOC_DRY=-mOCDRY-mOIMDRY',  # dry deposition
    'ctOC_IMP=cmOCWET+cmOIMWET',  # impaction scavenging
    'ctOC_ACT=cmOCACT+cmOIMACT',  # strat. nuc. scav.
    'ctOC_SCV=cmOCSCV+cmOIMSCV',  # conv. nuc. svav.
    'ctOC_SOURCE=ctOC_SF',  # sum of sources
    'ctOC_SINK=ctOC_DRY+ctOC_IMP+ctOC_ACT+ctOC_SCV',  # sum of sinks
    # Total BC. Note: aging balances to zero.
    'ctBC_SF=mBCSF',  # emissions
    'ctBC_DRY=-mBCDRY-mBIMDRY',  # dry deposition
    'ctBC_IMP=cmBCWET+cmBIMWET',  # impaction scavenging
    'ctBC_ACT=cmBCACT+cmBIMACT',  # strat. nuc. scav.
    'ctBC_SCV=cmBCSCV+cmBIMSCV',  # conv. nuc. svav.
    'ctBC_SOURCE=ctBC_SF',  # sum of sources
    'ctBC_SINK=ctBC_DRY+ctBC_IMP+ctBC_ACT+ctBC_SCV',  # sum of sinks
    # Total dust
    'ctDST_SF=DST01SF+DST02SF+DST03SF+DST04SF',  # emissions
    'ctDST_DRY=-DST01DD-DST02DD-DST03DD-DST04DD',  # dry deposition
    'ctDST_IMP=cDST01PP+cDST02PP+cDST03PP+cDST04PP',  # impaction scavenging
    'ctDST_SOURCE=ctDST_SF',  # sum of sources
    'ctDST_SINK=ctDST_DRY+ctDST_IMP',  # sum of sinks
    # Total sea-salt
    'ctSSLT_SF=SSLT01SF+SSLT02SF+SSLT03SF+SSLT04SF',  # emissions
    'ctSSLT_DRY=-SSLT01DD-SSLT02DD-SSLT03DD-SSLT04DD',  # dry deposition
    'ctSSLT_IMP=cSSLT01PP+cSSLT02PP+cSSLT03PP+cSSLT04PP',  # impaction scavenging
    'ctSSLT_SOURCE=ctSSLT_SF',  # sum of sources
    'ctSSLT_SINK=ctSSLT_DRY+ctSSLT_IMP',  # sum of sinks
    # ACC
    'cACC_DRY=-mACCDRY',  # dry deposition -> sink
    'cACC_IMP=cmACCWET',  # impaction scavenging -> sink
    'cACC_ACT=cmACCACT',  # strat. nuc. scav. -> sink
    'cACC_SCV=cmACCSCV',  # conv. nuc. scav. -> sink
    'cACC_CND=cCNDmACC',  # H2SO4 condensation -> source
    'cACC_EVAP=cmSO4evap',  # SO4 cloud evaporation -> source
    'cACC_ADJsrc=-cMVmAIT',  # mode mass adjustment source
    'cACC_CGsrc=-cCGmNUCAI-cCGmAITAC',  # coagulation source
    'cACC_CGsnk=cCGmACCMB+cCGmACCMO',  # coagulation sink
    'cACC_SOURCE=cACC_CND+cACC_EVAP+cACC_ADJsrc+cACC_CGsrc',  # sum of sources
    'cACC_SINK=cACC_DRY+cACC_IMP+cACC_ACT+cACC_SCV+cACC_CGsnk',  # sum of sinks
    # AIT
    'cAIT_DRY=-mAITDRY',  # dry deposition -> sink
    'cAIT_IMP=cmAITWET',  # impaction scavenging -> sink
    'cAIT_ACT=cmAITACT',  # strat. nuc. scav. -> sink
    'cAIT_SCV=cmAITSCV',  # conv. nuc. scav. -> sink
    'cAIT_CND=cCNDmAIT',  # H2SO4 condensation -> source
    'cAIT_ADJsrc=-cMVmNUC',  # mode mass adjustment source
    'cAIT_ADJsnk=cMVmAIT',  # mode mass adjustment sink
    'cAIT_CGsrc=-cCGmNUCAI',  # coagulation source
    'cAIT_CGsnk=cCGmAITAC+cCGmAITMB+cCGmAITMO',  # coagulation sink
    'cAIT_SOURCE=cAIT_CND+cAIT_ADJsrc+cAIT_CGsrc',  # sum of sources
    'cAIT_SINK=cAIT_DRY+cAIT_IMP+cAIT_ACT+cAIT_SCV+cAIT_ADJsnk+cAIT_CGsnk',  # sum of sinks
    # NUC
    'cNUC_DRY=-mNUCDRY',  # dry deposition -> sink
    'cNUC_IMP=cmNUCWET',  # impaction scavenging -> sink
    'cNUC_ACT=cmNUCACT',  # strat. nuc. scav. -> sink
    'cNUC_SCV=cmNUCSCV',  # conv. nuc. scav. -> sink
    'cNUC_CND=cCNDmNUC',  # H2SO4 condensation -> source
    'cNUC_BNUC=cBNUCmNUC',  # binary nucleation -> source
    'cNUC_ADJsnk=cMVmNUC',  # mode mass adjustment sink
    'cNUC_CGsnk=cCGmNUCAI+cCGmNUCAC+cCGmNUCMB+cCGmNUCMO',  # coagulation sink
    'cNUC_SOURCE=cNUC_CND+cNUC_BNUC',  # sum of sources
    'cNUC_SINK=cNUC_DRY+cNUC_IMP+cNUC_ACT+cNUC_SCV+cNUC_ADJsnk+cNUC_CGsnk',  # sum of sinks
    # OC
    'cOC_SF=mOCSF',  # emissions (including SOA)
    'cOC_DRY=-mOCDRY',  # dry deposition -> sink
    'cOC_IMP=cmOCWET',  # impaction scavenging -> sink
    'cOC_ACT=cmOCACT',  # strat. nuc. scav. -> sink
    'cOC_SCV=cmOCSCV',  # conv. nuc. scav. -> sink
    'cOC_AGEsnk=cAGEmOC',  # aging sink
    'cOC_CGsnk=cCGmOCMOS',  # coagulation sink
    'cOC_SOURCE=cOC_SF',  # sum of sources
    'cOC_SINK=cOC_DRY+cOC_IMP+cOC_ACT+cOC_SCV+cOC_AGEsnk+cOC_CGsnk',  # sum of sinks
    # MOS
    'cMOS_DRY=-mMOSDRY',  # dry deposition -> sink
    'cMOS_IMP=cmMOSWET',  # impaction scavenging -> sink
    'cMOS_ACT=cmMOSACT',  # strat. nuc. scav. -> sink
    'cMOS_SCV=cmMOSSCV',  # conv. nuc. scav. -> sink
    'cMOS_CND=cCNDmMOS',  # H2SO4 condensation -> source
    'cMOS_AGEsrc=cAGEmMOS-cAGEmOC',  # aging source
    'cMOS_CGsrc=-cCGmNUCMO-cCGmAITMO-cCGmACCMO-cCGmOCMOS',  # coagulation source
    'cMOS_SOURCE=cMOS_CND+cMOS_AGEsrc+cMOS_CGsrc',  # sum of sources
    'cMOS_SINK=cMOS_DRY+cMOS_IMP+cMOS_ACT+cMOS_SCV',  # sum of sinks
    # OIM
    'cOIM_DRY=-mOIMDRY',  # dry deposition -> sink
    'cOIM_IMP=cmOIMWET',  # impaction scavenging -> sink
    'cOIM_ACT=cmOIMACT',  # strat. nuc. scav. -> sink
    'cOIM_SCV=cmOIMSCV',  # conv. nuc. scav. -> sink
    'cOIM_AGEsrc=-cAGEmOC',  # aging source
    'cOIM_CGsrc=-cCGmOCMOS',  # coagulation source
    'cOIM_SOURCE=cOIM_AGEsrc+cOIM_CGsrc',  # sum of sources
    'cOIM_SINK=cOIM_DRY+cOIM_IMP+cOIM_ACT+cOIM_SCV',  # sum of sinks
    # SIMOS: sulfate in MOS
    'cSIMOS_DRY=cMOS_DRY-cOIM_DRY',  # dry deposition -> sink
    'cSIMOS_IMP=cMOS_IMP-cOIM_IMP',  # impaction scavenging -> sink
    'cSIMOS_ACT=cMOS_ACT-cOIM_ACT',  # strat. nuc. scav. -> sink
    'cSIMOS_SCV=cMOS_SCV-cOIM_SCV',  # conv. nuc. scav. -> sink
    'cSIMOS_CND=cMOS_CND',  # H2SO4 condensation -> source
    'cSIMOS_AGEsrc=cAGEmMOS',  # aging source
    'cSIMOS_CGsrc=-cCGmNUCMO-cCGmAITMO-cCGmACCMO',  # coagulation source
    'cSIMOS_SOURCE=cSIMOS_CND+cSIMOS_AGEsrc+cSIMOS_CGsrc',  # sum of sources
    'cSIMOS_SINK=cSIMOS_DRY+cSIMOS_IMP+cSIMOS_ACT+cSIMOS_SCV',  # sum of sinks
    # BC
    'cBC_SF=mBCSF',  # emissions (including SOA)
    'cBC_DRY=-mBCDRY',  # dry deposition -> sink
    'cBC_IMP=cmBCWET',  # impaction scavenging -> sink
    'cBC_ACT=cmBCACT',  # strat. nuc. scav. -> sink
    'cBC_SCV=cmBCSCV',  # conv. nuc. scav. -> sink
    'cBC_AGEsnk=cAGEmBC',  # aging sink
    'cBC_SOURCE=cBC_SF',  # sum of sources
    'cBC_SINK=cBC_DRY+cBC_IMP+cBC_ACT+cBC_SCV+cBC_AGEsnk',  # sum of sinks
    # MBS
    'cMBS_DRY=-mMBSDRY',  # dry deposition -> sink
    'cMBS_IMP=cmMBSWET',  # impaction scavenging -> sink
    'cMBS_ACT=cmMBSACT',  # strat. nuc. scav. -> sink
    'cMBS_SCV=cmMBSSCV',  # conv. nuc. scav. -> sink
    'cMBS_CND=cCNDmMBS',  # H2SO4 condensation -> source
    'cMBS_AGEsrc=cAGEmMBS-cAGEmBC',  # aging source
    'cMBS_CGsrc=-cCGmNUCMB-cCGmAITMB-cCGmACCMB',  # coagulation source
    'cMBS_SOURCE=cMBS_CND+cMBS_AGEsrc+cMBS_CGsrc',  # sum of sources
    'cMBS_SINK=cMBS_DRY+cMBS_IMP+cMBS_ACT+cMBS_SCV',  # sum of sinks
    # BIM
    'cBIM_DRY=-mBIMDRY',  # dry deposition -> sink
    'cBIM_IMP=cmBIMWET',  # impaction scavenging -> sink
    'cBIM_ACT=cmBIMACT',  # strat. nuc. scav. -> sink
    'cBIM_SCV=cmBIMSCV',  # conv. nuc. scav. -> sink
    'cBIM_AGEsrc=-cAGEmBC',  # aging source
    'cBIM_SOURCE=cBIM_AGEsrc',  # sum of sources
    'cBIM_SINK=cBIM_DRY+cBIM_IMP+cBIM_ACT+cBIM_SCV',  # sum of sinks
    # SIMBS: sulfate in MBS
    'cSIMBS_DRY=cMBS_DRY-cBIM_DRY',  # dry deposition -> sink
    'cSIMBS_IMP=cMBS_IMP-cBIM_IMP',  # impaction scavenging -> sink
    'cSIMBS_ACT=cMBS_ACT-cBIM_ACT',  # strat. nuc. scav. -> sink
    'cSIMBS_SCV=cMBS_SCV-cBIM_SCV',  # conv. nuc. scav. -> sink
    'cSIMBS_CND=cMBS_CND',  # H2SO4 condensation -> source
    'cSIMBS_AGEsrc=cAGEmMBS',  # aging source
    'cSIMBS_CGsrc=cMBS_CGsrc',  # coagulation source
    'cSIMBS_SOURCE=cSIMBS_CND+cSIMBS_AGEsrc+cSIMBS_CGsrc',  # sum of sources
    'cSIMBS_SINK=cSIMBS_DRY+cSIMBS_IMP+cSIMBS_ACT+cSIMBS_SCV',  # sum of sinks
    ]

In [10]:
for year in ['2000', '1850']:  # loop over year-2000 and year-1850
    for derived_variable in derived_variable_list:  # loop over variables to derive
        print('{}, {}'.format(derived_variable, year))
        variable_list = re.split('\=\-|\=|\+|\-', derived_variable)  # list of variables in derived variable
        # Merge input files
        in_filename_list = []
        for variable in variable_list[1:]:
            in_filename_list.append('{}/marc_s2_{}_{}_ANN.nc'.format(out_dir, year, variable))
        merge_filename = '{}/temp_{}_merge_ANN.nc'.format(out_dir, year)
        if len(in_filename_list) == 1:  # merge case 1: 1 input file
            ! cp {in_filename_list[0]} {merge_filename}
        elif len(in_filename_list) == 2:  # merge case 2: 2 input files
            ! cdo -s merge {in_filename_list[0]} {in_filename_list[1]} \
                {merge_filename} >/dev/null 2>/dev/null
        elif len(in_filename_list) == 3:  # merge case 3: 3 input files
            ! cdo -s merge {in_filename_list[0]} {in_filename_list[1]} {in_filename_list[2]} \
                {merge_filename} >/dev/null 2>/dev/null
        elif len(in_filename_list) == 4:  # merge case 4: 4 input files
            ! cdo -s merge {in_filename_list[0]} {in_filename_list[1]} {in_filename_list[2]} \
                {in_filename_list[3]} \
                {merge_filename} >/dev/null 2>/dev/null
        elif len(in_filename_list) == 5:  # merge case 5: 5 input files
            ! cdo -s merge {in_filename_list[0]} {in_filename_list[1]} {in_filename_list[2]} \
                {in_filename_list[3]} {in_filename_list[4]} \
                {merge_filename} >/dev/null 2>/dev/null
        elif len(in_filename_list) == 6:  # merge case 6: 6 input files
            ! cdo -s merge {in_filename_list[0]} {in_filename_list[1]} {in_filename_list[2]} \
                {in_filename_list[3]} {in_filename_list[4]} {in_filename_list[5]} \
                {merge_filename} >/dev/null 2>/dev/null
        elif len(in_filename_list) == 7:  # merge case 7: 7 input files
            ! cdo -s merge {in_filename_list[0]} {in_filename_list[1]} {in_filename_list[2]} \
                {in_filename_list[3]} {in_filename_list[4]} {in_filename_list[5]} \
                {in_filename_list[6]} {merge_filename} >/dev/null 2>/dev/null
        # Calculate derived variable
        out_filename = '{}/marc_s2_{}_{}_ANN.nc'.format(out_dir, year, variable_list[0])
        try:
            os.remove(out_filename)
        except FileNotFoundError:
            pass
        ! cdo -s expr,'{derived_variable}' {merge_filename} {out_filename}
        if os.path.isfile(out_filename):
            print('  Written {}'.format(out_filename.split('/')[-1]))
        # Remove temporary file
        for filename in [merge_filename, ]:
            print('  Removing {}'.format(filename.split('/')[-1]))
            os.remove(filename)
! date

ctSUL_DRY=-mNUCDRY-mAITDRY-mACCDRY-mMOSDRY-mMBSDRY+mOIMDRY+mBIMDRY, 2000
  Written marc_s2_2000_ctSUL_DRY_ANN.nc
  Removing temp_2000_merge_ANN.nc
ctSUL_IMP=cmNUCWET+cmAITWET+cmACCWET+cmMOSWET+cmMBSWET-cmOIMWET-cmBIMWET, 2000
  Written marc_s2_2000_ctSUL_IMP_ANN.nc
  Removing temp_2000_merge_ANN.nc
ctSUL_ACT=cmNUCACT+cmAITACT+cmACCACT+cmMOSACT+cmMBSACT-cmOIMACT-cmBIMACT, 2000
  Written marc_s2_2000_ctSUL_ACT_ANN.nc
  Removing temp_2000_merge_ANN.nc
ctSUL_SCV=cmNUCSCV+cmAITSCV+cmACCSCV+cmMOSSCV+cmMBSSCV-cmOIMSCV-cmBIMSCV, 2000
  Written marc_s2_2000_ctSUL_SCV_ANN.nc
  Removing temp_2000_merge_ANN.nc
ctSUL_CND=cCNDmNUC+cCNDmAIT+cCNDmACC+cCNDmMOS+cCNDmMBS, 2000
  Written marc_s2_2000_ctSUL_CND_ANN.nc
  Removing temp_2000_merge_ANN.nc
ctSUL_BNUC=cBNUCmNUC, 2000
  Written marc_s2_2000_ctSUL_BNUC_ANN.nc
  Removing temp_2000_merge_ANN.nc
ctSUL_EVAP=cmSO4evap, 2000
  Written marc_s2_2000_ctSUL_EVAP_ANN.nc
  Removing temp_2000_merge_ANN.nc
ctSUL_AGEsrc=cAGEmMOS+cAGEmMBS, 2000
  Written marc_s2_

  Written marc_s2_2000_cOC_SINK_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_DRY=-mMOSDRY, 2000
  Written marc_s2_2000_cMOS_DRY_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_IMP=cmMOSWET, 2000
  Written marc_s2_2000_cMOS_IMP_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_ACT=cmMOSACT, 2000
  Written marc_s2_2000_cMOS_ACT_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_SCV=cmMOSSCV, 2000
  Written marc_s2_2000_cMOS_SCV_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_CND=cCNDmMOS, 2000
  Written marc_s2_2000_cMOS_CND_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_AGEsrc=cAGEmMOS-cAGEmOC, 2000
  Written marc_s2_2000_cMOS_AGEsrc_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_CGsrc=-cCGmNUCMO-cCGmAITMO-cCGmACCMO-cCGmOCMOS, 2000
  Written marc_s2_2000_cMOS_CGsrc_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_SOURCE=cMOS_CND+cMOS_AGEsrc+cMOS_CGsrc, 2000
  Written marc_s2_2000_cMOS_SOURCE_ANN.nc
  Removing temp_2000_merge_ANN.nc
cMOS_SINK=cMOS_DRY+cMOS_IMP+cMOS_ACT+cMOS_SCV, 2000
  Written marc_s2_20

  Written marc_s2_1850_ctOC_SCV_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctOC_SOURCE=ctOC_SF, 1850
  Written marc_s2_1850_ctOC_SOURCE_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctOC_SINK=ctOC_DRY+ctOC_IMP+ctOC_ACT+ctOC_SCV, 1850
  Written marc_s2_1850_ctOC_SINK_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctBC_SF=mBCSF, 1850
  Written marc_s2_1850_ctBC_SF_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctBC_DRY=-mBCDRY-mBIMDRY, 1850
  Written marc_s2_1850_ctBC_DRY_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctBC_IMP=cmBCWET+cmBIMWET, 1850
  Written marc_s2_1850_ctBC_IMP_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctBC_ACT=cmBCACT+cmBIMACT, 1850
  Written marc_s2_1850_ctBC_ACT_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctBC_SCV=cmBCSCV+cmBIMSCV, 1850
  Written marc_s2_1850_ctBC_SCV_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctBC_SOURCE=ctBC_SF, 1850
  Written marc_s2_1850_ctBC_SOURCE_ANN.nc
  Removing temp_1850_merge_ANN.nc
ctBC_SINK=ctBC_DRY+ctBC_IMP+ctBC_ACT+ctBC_SCV, 1850
  Written marc_s2_1850_ctBC_SINK_A

  Written marc_s2_1850_cOIM_SOURCE_ANN.nc
  Removing temp_1850_merge_ANN.nc
cOIM_SINK=cOIM_DRY+cOIM_IMP+cOIM_ACT+cOIM_SCV, 1850
  Written marc_s2_1850_cOIM_SINK_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_DRY=cMOS_DRY-cOIM_DRY, 1850
  Written marc_s2_1850_cSIMOS_DRY_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_IMP=cMOS_IMP-cOIM_IMP, 1850
  Written marc_s2_1850_cSIMOS_IMP_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_ACT=cMOS_ACT-cOIM_ACT, 1850
  Written marc_s2_1850_cSIMOS_ACT_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_SCV=cMOS_SCV-cOIM_SCV, 1850
  Written marc_s2_1850_cSIMOS_SCV_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_CND=cMOS_CND, 1850
  Written marc_s2_1850_cSIMOS_CND_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_AGEsrc=cAGEmMOS, 1850
  Written marc_s2_1850_cSIMOS_AGEsrc_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_CGsrc=-cCGmNUCMO-cCGmAITMO-cCGmACCMO, 1850
  Written marc_s2_1850_cSIMOS_CGsrc_ANN.nc
  Removing temp_1850_merge_ANN.nc
cSIMOS_SOURCE=cSIMOS_CN