# Flash drought dataset extraction

In [3]:
import os
import glob as glob
import xarray as xr
import sys
import dask
import tempfile
from dask.diagnostics import ProgressBar
from dask.distributed import Client, LocalCluster
import warnings
import logging
warnings.filterwarnings('ignore') 
logging.getLogger("distributed").setLevel(logging.ERROR)
logging.getLogger('flox').setLevel(logging.WARNING)

sys.path.append('/g/data/mn51/users/jb6465/code/flash-drought/attribution-python')
from extract import *

## Extract renanalysis

#### Extract BARRA (faster without dask)

In [2]:
extracted_data_save_dir = '/g/data/mn51/users/jb6465/data/flash_drought/reanalysis/BARRA-R2/'
extract_domain = 'barra_domain'

for target_var in ['tasmax', 'tasmin', 'sfcWind', 'tas', 'rsds', 'huss', 'hurs', 'ps', 'pr']:
    print(target_var)
    for year in list(range(1979,2025)):
      barra_daily_extract(target_var, extracted_data_save_dir, extract_domain, year)

tasmax
tasmin
sfcWind
tas
rsds
huss
hurs
ps
pr


#### Extract ERA5 and ERA5-Land(faster with dask)

In [4]:
dask.config.set({'array.chunk-size': "256 MiB",'array.slicing.split_large_chunks': True, 'distributed.comm.timeouts.connect': '120s', 'distributed.comm.timeouts.tcp': '120s', 'distributed.comm.retry.count': 10, 'distributed.scheduler.allowed-failures': 20, "distributed.scheduler.worker-saturation": 1.1})
client = Client(n_workers=12, threads_per_worker=1, local_directory = tempfile.mkdtemp(), memory_limit = "63000mb")

In [5]:
extracted_data_save_dir = '/g/data/mn51/users/jb6465/data/flash_drought/reanalysis/ERA5/' #change dir to ERA5-Land
extract_domain = 'barra_domain'

for target_var in ['10u', '10v', '10w', '2t', '2tMax', '2tMin', 'ssrd', 'sp', '2d', 'tp']: # u10 v10 for ERA5-Land (the remaining keys are the same)
    print(target_var)
    for year in list(range(1979,2025)):
      era5_daily_extract(target_var, extracted_data_save_dir, extract_domain, year, 'ERA5')  #change string switch to ERA5-Land


10u
10v
10w
2t
2tMax
2tMin
ssrd
sp
2d
tp


#### Extract MERRA2 (faster with dask)

In [5]:
extracted_data_save_dir = '/g/data/mn51/users/jb6465/data/flash_drought/reanalysis/MERRA2/'
extract_domain = 'barra_domain'

for target_var in [['SWGDN'],['T2M', 'T2MDEW', 'QV2M', 'PS'],['U2M', 'V2M'],['T2M_MAX'],['T2M_MIN']]:
    print(target_var)
    for year in range(1980, 2023):
        merra2_daily_extract(target_var, extracted_data_save_dir, extract_domain, year)  

['SWGDN']
['T2M', 'T2MDEW', 'QV2M', 'PS']
['U2M', 'V2M']
['T2M_MAX']
['T2M_MIN']


In [6]:
client.close()