In [1]:
## import libraries
import os, sys
import yaml
import xarray as xr
import numpy as np
import pandas as pd

%matplotlib inline
path_to_repo = '/cw3e/mead/projects/cwp140/scratch/dnash/repos/SEAK_AR_impacts/'
sys.path.append(path_to_repo+'modules')
import GEFSv12_funcs as gefs

In [8]:
%%time

config_file = 'config_2.yaml' # this is the config file name
job_info = 'job_1' # this is the job name

path_to_data = '/cw3e/mead/projects/cwp140/scratch/dnash/data/'

config = yaml.load(open(config_file), Loader=yaml.SafeLoader) # read the file
ddict = config[job_info] # pull the job info from the dict

year = ddict['year']
date = ddict['date']
ens = ddict['ens']
varname = 'ivt' ## can be 'ivt', 'freezing_level', or 'prec'

print('Loading u, v, and q data ....')
varname_lst = ['ugrd', 'vgrd', 'spfh']
ds_lst = []
for i, varname in enumerate(varname_lst):
    ds = gefs.read_and_regrid_prs_var(varname, date, year)
    ds_lst.append(ds)

## load in surface pressure
print('Loading surface pressure data ....')
ds_pres = gefs.read_sfc_var('pres', date, year)
ds_lst.append(ds_pres)

ds = xr.merge(ds_lst) # merge u, v, and q into single ds
ds = ds.sel(isobaricInhPa=slice(300, 1000))
ds = ds.reindex(isobaricInhPa=ds.isobaricInhPa[::-1])

## mask values below surface pressure
print('Masking values below surface ....')
varlst = ['q', 'u', 'v']
for i, varname in enumerate(varlst):
    ds[varname] = ds[varname].where(ds[varname].isobaricInhPa < ds.sp/100., drop=False)

## integrate to calculate IVT
print('Calculating IVT ....')
ds_IVT = gefs.calc_IVT_manual(ds) # calculate IVT
ds_IVT

Loading u, v, and q data ....
Loading surface pressure data ....
CPU times: user 1min 52s, sys: 1min 13s, total: 3min 5s
Wall time: 3min 7s


In [12]:
## save IVT data to netCDF file
print('Writing {0} to netCDF ....'.format(date))
out_fname = path_to_data + 'preprocessed/GEFSv12_reforecast/ivt/{0}_ivt.nc'.format(date)
new_ds.to_netcdf(path=out_fname, mode = 'w', format='NETCDF4')

Writing 20091226 to netCDF ....


In [29]:
import pandas as pd
from datetime import timedelta
import numpy as np
import yaml
from itertools import chain

## for each year between 2000 and 2019
date_lst = []
for i, yr in enumerate(range(2000, 2020)):
    ## get 55 days before November 21
    center_date = '{0}-11-21'.format(yr)
    center_date = pd.to_datetime(center_date)
    start_date = center_date - timedelta(days=55)
    
    ## get 45 days after November 21
    end_date = center_date + timedelta(days=45)

    ## make a list of dates between start_date and end_date
    dates = pd.date_range(start_date, end_date, freq='1D')
    
    date_lst.append(dates)
    
final_lst = np.concatenate(date_lst)

jobcounter = 0
filecounter = 0
## loop through to create dictionary for each job
d_lst = []
dest_lst = []
njob_lst = []
for i, date in enumerate(final_lst):
    jobcounter += 1
    t = pd.to_datetime(str(date)) 
    yr = t.strftime("%Y")
    dt = t.strftime("%Y%m%d")
    d = {'job_{0}'.format(jobcounter):
         {'year': yr,
          'date': dt,
          'ens': 'c00'
          }}
    d_lst.append(d)
    
    if (jobcounter == 999):
        filecounter += 1
        ## merge all the dictionaries to one
        dest = dict(chain.from_iterable(map(dict.items, d_lst)))
        njob_lst.append(len(d_lst))
        ## write to .yaml file and close
        file=open("config_{0}.yaml".format(str(filecounter)),"w")
        yaml.dump(dest,file, allow_unicode=True, default_flow_style=None)
        file.close()
        
        ## reset jobcounter and d_lst
        jobcounter = 0
        d_lst = []
        
## now save the final config
filecounter += 1
## merge all the dictionaries to one
dest = dict(chain.from_iterable(map(dict.items, d_lst)))
njob_lst.append(len(d_lst))
## write to .yaml file and close
file=open("config_{0}.yaml".format(str(filecounter)),"w")
yaml.dump(dest,file, allow_unicode=True, default_flow_style=None)
file.close()

## create calls.txt for config_1(-8)

for i, njobs in enumerate(njob_lst):
    call_str_lst = []
    for j, job in enumerate(range(1, njobs+1, 1)):
        call_string = "python getGEFSv12_batch.py config_{0}.yaml 'job_{1}'".format(i+1, j+1)
        call_str_lst.append(call_string)
        
    ## now write those lines to a text file
    with open('calls_{0}.txt'.format(i+1), 'w',encoding='utf-8') as f:
        for line in call_str_lst:
            f.write(line)
            f.write('\n')
        f.close()

In [None]:
"""
Filename:    getGEFSv12_batch.py
Author:      Deanna Nash, dnash@ucsd.edu
Description: Download GEFSv12 Reforecast data based on input configuration dictionary.

"""
import sys
import yaml
import subprocess

### Imports config name from argument when submit
yaml_doc = sys.argv[1]
config_name = sys.argv[2]

# import configuration file for season dictionary choice
config = yaml.load(open(yaml_doc), Loader=yaml.SafeLoader)
ddict = config[config_name]

year = ddict['year']
date = ddict['date']
ens = ddict['ens']
varname = 'ivt' ## can be 'ivt', 'freezing_level', or 'prec'

## run download_GEFSv12_reforecast.sh to download data 
bash_script = "download_GEFSv12_reforecast.sh"
print(subprocess.run([bash_script, year, date, ens, varname]))