In [69]:
from cdo import Cdo
import xarray as xr
import os
import sys
from pathlib import Path
import glob
import difflib
from collections import OrderedDict
import re

In [73]:
cdo = Cdo()

In [133]:
DATA_DIR = Path('/media/abhi/My_Passport/NEX-GDDP-NASA')
OUT_DIR = Path('/media/abhi/My_Passport/NEX-GDDP-NASA-OUTPUT')
os.system(f'mkdir -p "{OUT_DIR}"')

0

In [134]:
models =   [dI for dI in os.listdir(DATA_DIR) 
           if os.path.isdir(os.path.join(DATA_DIR,dI))]

variables = ['pr', 'tasmax', 'tasmin']
scen = ['historical', 'rcp45', 'rcp85']

In [135]:
possibilities = OrderedDict({'model': models,
                             'variable': variables,
                             'scen': scen})

In [136]:
def get_apt_names(model, variable, scen):
    model, variable, scen = [difflib.get_close_matches(word, possible, n=1, cutoff=0)[0]
                             for word, possible in zip([model, variable, scen], 
                                                       possibilities.values())]
    
    return [model, variable, scen]

In [137]:
def get_model_files(model, variable, scen):
    model, variable, scen = get_apt_names(model, variable, scen)
    
    return glob.glob(f'{DATA_DIR}/{model}/{variable}/*{scen}*.nc')

In [147]:
def make_year_combined_file(model, variable, scen, outdir=None):
    
    model, variable, scen = get_apt_names(model, variable, scen)

    model_files = get_model_files(model, variable, scen)
    
    # Make outdir
    outdir = f'{OUT_DIR}/model_year_combined/{model}/{variable}'
    any_file = model_files[0]
    ens_id =  Path(any_file).name.split('_')[-2]
    os.system(f'mkdir -p {outdir}')
    
    cmd = ' '.join(model_files)
    
    outname = '_'.join([variable, 'day', model, scen, '1950-2005.nc'])

    cdo.mergetime(input=cmd,
                  output=f"{outdir}/tmp.nc",
                 env={"SKIP_SAME_TIME": "1"})
    
    if variable == "pr":
        cdo.mulc('86400', input=f"{outdir}/tmp.nc",
                output=f"{outdir}/{outname}")
        
    elif variable in ['tasmax', 'tasmin']:
        cdo.addc('-273.15', input=f"{outdir}/tmp.nc",
                output=f"{outdir}/{outname}")        
    
    os.system(f'rm {outdir}/tmp.nc')
    
  
    

In [148]:
%%time
make_year_combined_file('ACCESS1-0', 'tasmax', 'hist')

CPU times: user 20 ms, sys: 32 ms, total: 52 ms
Wall time: 2min 48s


In [151]:
from multiprocessing import Pool

In [None]:
%%time
pool = Pool(14)

pool.apply_async(make_year_combined_file, ('ACCESS1-0', 'tasmin', 'hist'))
pool.apply_async(make_year_combined_file, ('ACCESS1-0', 'tasmax', 'hist'))

pool.close()
pool.join()