In [3]:
import logging
logging.captureWarnings(True)
logging.getLogger('py.warnings').setLevel(logging.ERROR)

import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import Image, display
from dask.distributed import Client, progress
from datetime import datetime, timedelta
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import matplotlib.ticker as mticker
import matplotlib.path as mpath
import matplotlib.patheffects as PathEffects
from scipy import stats
import scipy
import numpy.ma as ma
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1 import ImageGrid

### Get list of days that AR made landfall in SB (lat=34.5, lon=-119.375)

In [5]:
latitude=34.5
longitude=359.375-119.375

filename =  '/home/sbarc/students/nash/data/ar_catalog/globalARcatalog_MERRA2_1980-2017_v2.0.nc'
ds = xr.open_dataset(filename, engine='netcdf4')
ds = ds.sel(lat=latitude, lon=longitude)
ds = ds.squeeze()
print('ds size in GB {:0.2f}\n'.format(ds.nbytes / 1e9))
ds.info()

ds size in GB 0.02

xarray.Dataset {
dimensions:
	limitID = 5 ;
	stageID = 9 ;
	time = 55520 ;

variables:
	float32 shape(time) ;
		shape:long_name = Shape ;
		shape:units = none ;
	float64 axis(time) ;
		axis:long_name = Axis ;
		axis:units = none ;
	float64 tnsct(time) ;
		tnsct:long_name = Transect ;
		tnsct:units = none ;
	float32 lfloc(time) ;
		lfloc:long_name = Landfall Location ;
		lfloc:units = none ;
	float32 islnd() ;
		islnd:long_name = Is Land (Major Landmasses Only) ;
	float32 iscst() ;
		iscst:long_name = Is Coast (Major Landmasses Only; Inland Water Bodies Not Considered) ;
	float64 lon() ;
		lon:units = degrees_east ;
	float64 lat() ;
		lat:units = degrees_north ;
	float64 lev() ;
		lev:units = m ;
	datetime64[ns] time(time) ;
		time:ntime_written = 55520.0 ;
	float64 ens() ;
		ens:axis = e ;
	float32 year(time) ;
		year:long_name = Year ;
		year:units = none ;
	float32 month(time) ;
		month:long_name = Month ;
		month:units = none ;
	float32 day(time) ;
		day:long_nam

In [9]:
da = ds.shape.dropna(dim='time') ## drop the times where there is no AR present
da['time'] = da.indexes['time'].normalize() ## normalize the time index to get list of days
index = np.unique(da['time']) ## list of days where AR is present in loc
times_list = pd.DatetimeIndex(index) ## create a pandas datetime index for easy time use

In [16]:
## Get only March AR days
march_ar = times_list.month ==3
march_ar = times_list[march_ar]
march_ar

DatetimeIndex(['1980-03-02', '1980-03-03', '1981-03-01', '1981-03-19',
               '1981-03-20', '1981-03-21', '1981-03-26', '1982-03-01',
               '1982-03-07', '1982-03-08',
               ...
               '2014-03-30', '2015-03-11', '2016-03-05', '2016-03-06',
               '2016-03-08', '2016-03-09', '2016-03-11', '2016-03-13',
               '2017-03-21', '2017-03-25'],
              dtype='datetime64[ns]', length=170, freq=None)

### Load only AR days of MERRA2 U, V, and Q between 1000 hPa and 300 hPa

In [18]:
client = Client(processes=True, n_workers=3)
client

0,1
Client  Scheduler: tcp://127.0.0.1:34481  Dashboard: http://127.0.0.1:33336/status,Cluster  Workers: 3  Cores: 48  Memory: 135.02 GB


In [19]:
times_list = march_ar
## create explicit list of files because they are saved in multiple folders :/
year_path = []
version = []
yearmonthday = []
file_list = []

for t in np.arange(len(times_list)):
## Check version based on year in times_list    
    if times_list[t].year >= 2011:
        version.append('4')
    else:   # version must be 1, 2, or 3
        if times_list[t].year >= 2001:
            version.append('3')
        else:  # version must be 1 or 2
            if times_list[t].year >= 1992:
                version.append('2')
            else:    # version must be 1
                version.append('1')
            
    y = str(times_list[t].year)
    if times_list[t].month < 10:
        month = '0' + str(times_list[t].month)
    else:
        month = str(times_list[t].month)

    if times_list[t].day < 10:
        day =  '0' + str(times_list[t].day)
    else:
        day = str(times_list[t].day)

    yearmonthday.append(y + month + day)
    
    if times_list[t].year >= 2016:
        year_path.append('2016-2017')
    else:
        if times_list[t].year >= 2011:
            year_path.append('2011-2015')
        else:
            if times_list[t].year >= 2006:
                year_path.append('2006-2010')
            else:
                if times_list[t].year >= 2000:
                    year_path.append('2000-2005')
                else:
                    if times_list[t].year >= 1993:
                        year_path.append('1993-1999')
                    else:
                        year_path.append('1980-1992')
     
    file_list.append('/home/sbarc/students/nash/data/M2I6NPANA.5.12.4/global_daily_' + year_path[t] + '/MERRA2_' + version[t] + '00.inst6_3d_ana_Np.' + yearmonthday[t] +'.SUB.nc')

In [20]:
## import MERRA2 vertical level data (u, v, q) only for AR days
def preprocess(ds):
    '''keep only the selected location and variables'''
    unused_variables = ['T', 'H', 'SLP', 'O3', 'PS']
    ds = ds.drop(unused_variables)
    return ds.sel(lat=34.5, lon=-119.375, bnds=1, lev=slice(1000.0, 300.0))

ds = xr.open_mfdataset(file_list,
                       engine='netcdf4', concat_dim='time', autoclose=True, preprocess=preprocess)

print('ds size in GB {:0.2f}\n'.format(ds.nbytes / 1e9))
ds.info()

ds size in GB 0.00

xarray.Dataset {
dimensions:
	lev = 21 ;
	time = 170 ;

variables:
	float64 lat() ;
		lat:standard_name = latitude ;
		lat:long_name = latitude ;
		lat:units = degrees_north ;
		lat:axis = Y ;
	float64 lev(lev) ;
		lev:standard_name = air_pressure ;
		lev:long_name = vertical level ;
		lev:units = hPa ;
		lev:positive = down ;
		lev:axis = Z ;
	float64 lon() ;
		lon:standard_name = longitude ;
		lon:long_name = longitude ;
		lon:units = degrees_east ;
		lon:axis = X ;
	float32 QV(time, lev) ;
		QV:standard_name = specific_humidity ;
		QV:long_name = Specific humidity ;
		QV:units = kg/kg ;
		QV:fmissing_value = 999999986991104.0 ;
		QV:vmax = 999999986991104.0 ;
		QV:vmin = -999999986991104.0 ;
	float32 U(time, lev) ;
		U:standard_name = eastward_wind ;
		U:long_name = Eastward wind component ;
		U:units = m/s ;
		U:fmissing_value = 999999986991104.0 ;
		U:vmax = 999999986991104.0 ;
		U:vmin = -999999986991104.0 ;
	float32 V(time, lev) ;
		V:standard_name = northwar

In [21]:
save_path = '/home/sbarc/students/nash/data/MERRA-2/SB_march_AR_vertical_vapor_flux.nc'
ds.to_netcdf(path=save_path, mode = 'w', format='NETCDF4')