In [8]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr
from functools import reduce
import xoak
import metpy.calc as mpcalc
from metpy.units import units

In [2]:
# Set up paths

path_to_data = '/cw3e/mead/projects/cwp140/scratch/dnash/data/'      # project data -- read only
path_to_work = '/cw3e/mead/projects/cwp140/scratch/dnash/data/preprocessed/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
### to pull precip time series from data
## choose which option
# (a) select the grid cell closest to each of the communities
# (b) select the 9 closest grid cells to each of the communities - take maximum value
# (c) select the 25 closest grid cells to each of the communities- take maximum value
option = 'a'

### choose which temporal resolution for the precipitation data (hourly or daily)
temporal_res = 'daily'

### variable name (PCPT, T2, UV)
varname = 'UV'


In [4]:
### TODO: make a yaml dict
ext1 = [-141., -130., 54., 61.] # extent of SEAK 

## six communities
# -135.4519	58.1122, Hoonah (PAOH) 
# -135.3277	59.4538, Skagway (PAGY)
# -135.8894, 59.3988, Klukwan
# -139.671	59.5121, Yakutat (PAYA)
# -133.1358, 55.4769, Craig
# -132.4009, 55.5400, Kasaan

xs = [-135.4519, -135.3277, -135.8894, -139.671, -133.1358, -132.4009]
ys = [58.1122, 59.4538, 59.3988, 59.5121, 55.4769, 55.5400]
lbl1 = ['Hoonah', 'Skagway', 'Klukwan', 'Yakutat', 'Craig', 'Kasaan']
lbl_align = ['center', 'left', 'right', 'center', 'right', 'center'] # where the labels go


### Import preprocessed SEAK-WRF precip data

In [5]:
fname_pattern = path_to_work + 'SEAK-WRF-{0}/WRFDS_{0}_*.nc'.format(varname)
wrf = xr.open_mfdataset(fname_pattern, combine='by_coords')
        
if varname == 'UV':
    wrf = wrf.sel(lev='1000')
      
if (temporal_res == 'daily') & (varname == 'PCPT'):
    wrf = wrf.resample(time="1D").sum('time')

elif (temporal_res == 'daily') & (varname != 'PCPT'):
    wrf = wrf.resample(time="1D").mean('time')

elif (temporal_res == 'hourly'):
    wrf = wrf

ERROR 1: PROJ: proj_create_from_database: Open of /cw3e/mead/projects/cwp140/scratch/dnash/miniconda3/envs/SEAK-clim/share/proj failed


## Generate precipitation time series for each community

In [9]:
%%time

# diff_lat = wrf.lat.values[2] - wrf.lat.values[1]
# diff_lon = wrf.lon.values[2] - wrf.lon.values[1]
df_lst2 = []
row_lbl2 = []
for i, (slon, slat) in enumerate(zip(xs, ys)):
    
    if option == 'a':
        ## select nearest grid cell to station
        points = xr.Dataset({"lat": slat, "lon": slon})
        wrf.xoak.set_index(["lat", "lon"], "sklearn_geo_balltree")
        ds = wrf.xoak.sel(lat=points.lat, lon=points.lon)
        
        if varname == 'UV':
            ## calculate UV direction
            uvec = units.Quantity(ds['U'].values, "m/s")
            vvec = units.Quantity(ds['V'].values, "m/s")
            uvdir = mpcalc.wind_direction(uvec, vvec)
            ds = ds.assign(UV=lambda ds: uvdir)

    
    df = ds[varname].to_dataframe()
    df['time'] = ds.time.values
    df = df.rename(columns={varname: lbl1[i]}) # rename from varname to the name of the community
    df_lst2.append(df)

CPU times: user 31min 38s, sys: 40min 54s, total: 1h 12min 33s
Wall time: 2h 6min 32s


In [16]:
## merge all dfs to one
df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)
## hack for weird behavior for daily df option a
if (option == 'a'):
    df_merged = df_merged.drop(['lat_x', 'lat_y', 'lon_x', 'lon_y', 'lev_x', 'lev_y'], axis=1)
    
df_merged

  df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)
  df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)


Unnamed: 0,Hoonah,time,Skagway,Klukwan,Yakutat,Craig,Kasaan
0,16.499893,1980-01-01,327.242584,228.140335,59.335022,115.466476,100.156219
1,65.698051,1980-01-02,337.506927,234.494370,59.823841,114.195038,99.809120
2,53.430416,1980-01-03,306.871979,231.602646,71.329239,175.440323,42.634338
3,22.557716,1980-01-04,280.968018,247.927551,62.434628,332.557251,334.234741
4,85.936264,1980-01-05,299.025696,185.715652,85.019539,18.865433,355.335297
...,...,...,...,...,...,...,...
14605,168.980515,2019-12-27,353.132355,43.484665,180.245224,180.679565,139.058334
14606,127.320236,2019-12-28,336.062927,8.320633,121.337975,155.216583,135.899094
14607,112.846909,2019-12-29,344.921143,30.850540,127.953171,135.850113,121.143997
14608,79.639374,2019-12-30,342.101349,12.699074,107.265442,153.996002,131.886124


In [17]:
## save to csv file
df_merged.to_csv(path_to_out + 'SEAK_{0}_{1}_{2}.csv'.format(varname, option, temporal_res))