In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr
from functools import reduce

In [2]:
# Set up paths

path_to_data = '/cw3e/mead/projects/cwp140/scratch/dnash/data/'      # project data -- read only
path_to_work = '/cw3e/mead/projects/cwp140/scratch/dnash/data/preprocessed/SEAK-WRF-precip/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [10]:
### to pull precip time series from data
## choose which option
# (a) select the grid cell closest to each of the communities
# (b) select the 9 closest grid cells to each of the communities - take maximum value
# (c) select the 25 closest grid cells to each of the communities- take maximum value
option = 'a'

### choose which temporal resolution for the precipitation data (hourly or daily)
temporal_res = 'daily'

In [11]:
### TODO: make a yaml dict
ext1 = [-141., -130., 54., 61.] # extent of SEAK 

## six communities
# -135.4519	58.1122, Hoonah (PAOH) 
# -135.3277	59.4538, Skagway (PAGY)
# -135.8894, 59.3988, Klukwan
# -139.671	59.5121, Yakutat (PAYA)
# -133.1358, 55.4769, Craig
# -132.4009, 55.5400, Kasaan

xs = [-135.4519, -135.3277, -135.8894, -139.671, -133.1358, -132.4009]
ys = [58.1122, 59.4538, 59.3988, 59.5121, 55.4769, 55.5400]
lbl1 = ['Hoonah', 'Skagway', 'Klukwan', 'Yakutat', 'Craig', 'Kasaan']
lbl_align = ['center', 'left', 'right', 'center', 'right', 'center'] # where the labels go

## closest station
# -135.4519	58.1122, Hoonah (PAOH) 
# -135.3277	59.4538, Skagway (PAGY)
# -135.5117	59.2429, Haines (PAHN)
# -139.671	59.5121, Yakutat (PAYA)
# -133.076	55.5792, Klawock (PAKW)
# -131.7117	55.3567, Ketchikan (PAKT)
xs2 = [-135.4519, -135.3277, -135.5117, -139.671, -133.076, -131.7117]
ys2 = [58.1122, 59.4538, 59.2429, 59.5121, 55.5792, 55.3567]
lbl2 = ['PAOH', 'PAGY', 'PAHN', 'PAYA', 'PAKW', 'PAKT']

### Import preprocessed SEAK-WRF precip data

In [12]:
fname_pattern = path_to_work + 'WRFDS_PCPT_*.nc'
wrf = xr.open_mfdataset(fname_pattern, combine='by_coords')

if temporal_res == 'hourly':
    wrf = wrf
elif temporal_res == 'daily':
    wrf = wrf.resample(time="1D").sum('time')

## Generate precipitation time series for each community

In [13]:
%%time

diff_lat = wrf.lat.values[2] - wrf.lat.values[1]
diff_lon = wrf.lon.values[2] - wrf.lon.values[1]
df_lst2 = []
row_lbl2 = []
for i, (slon, slat) in enumerate(zip(xs, ys)):
    
    if option == 'a':
        ## select nearest grid cell to station
        ds = wrf.sel(lat=slat, lon=slon, method="nearest")
    elif option == 'b':
        scale = 1.5 ## "nearest neighbor" grid cells
        ds = wrf.sel(lat=slice(slat-diff_lat*scale, slat+diff_lat*scale), lon=slice(slon-diff_lon*scale, slon+diff_lon*scale))
        ds = ds.max(['lat', 'lon'])
    elif option == 'c': 
        scale = 2.5 ## "nearest neighbor" grid cells plus buffer
        ds = wrf.sel(lat=slice(slat-diff_lat*scale, slat+diff_lat*scale), lon=slice(slon-diff_lon*scale, slon+diff_lon*scale))
        ds = ds.max(['lat', 'lon'])
    
    df = ds.prec.to_dataframe()
    df = df.rename(columns={"prec": lbl1[i]}) # rename precip column to the name of the community
    df_lst2.append(df)
    
    # make nice labels for plot
    lbl = u"{:.2f}\N{DEGREE SIGN}N, {:.2f}\N{DEGREE SIGN}W".format(slat, slon*-1)
    row_lbl2.append(lbl)

CPU times: user 14min 14s, sys: 42min 3s, total: 56min 18s
Wall time: 31min 32s


In [14]:
# df_new = []
# for i, df in enumerate(df_lst2):
#     df = df.drop(['lat', 'lon'], axis=1)
#     df_new.append(df)

In [15]:
## merge all dfs to one
df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)
## hack for weird behavior for daily df option a
if (temporal_res == 'daily') & (option == 'a'):
    df_merged = df_merged.drop(['lat_x', 'lat_y', 'lon_x', 'lon_y'], axis=1)
    
df_merged

  df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)
  df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)


Unnamed: 0_level_0,Hoonah,Skagway,Klukwan,Yakutat,Craig,Kasaan
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-01,7.655640,0.076904,0.204346,0.000000,6.781860,1.809143
1980-01-02,29.412842,1.299072,0.494904,5.281372,5.020081,4.159241
1980-01-03,7.745972,0.138596,0.077911,0.000000,0.870911,1.140930
1980-01-04,0.078857,0.313400,0.000000,0.000000,0.202881,0.033142
1980-01-05,0.000000,0.000000,0.000000,0.658325,0.000000,0.000000
...,...,...,...,...,...,...
2019-12-27,2.875000,3.273438,14.242188,24.281250,2.414062,3.140625
2019-12-28,40.273438,10.777344,23.679688,47.187500,42.125000,65.742188
2019-12-29,7.265625,2.371094,11.242188,33.281250,3.562500,9.632812
2019-12-30,44.453125,5.171875,19.691406,45.390625,17.234375,47.273438


In [16]:
## save to csv file
df_merged.to_csv(path_to_out + 'SEAK_precip_max_{0}_{1}.csv'.format(option, temporal_res))