In [1]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr
from functools import reduce
import xoak

In [2]:
# Set up paths

path_to_data = '/cw3e/mead/projects/cwp140/scratch/dnash/data/'      # project data -- read only
path_to_work = '/cw3e/mead/projects/cwp140/scratch/dnash/data/preprocessed/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
### to pull precip time series from data
## choose which option
# (a) select the grid cell closest to each of the communities
# (b) select the 9 closest grid cells to each of the communities - take maximum value
# (c) select the 25 closest grid cells to each of the communities- take maximum value
option = 'a'

### choose which temporal resolution for the precipitation data (hourly or daily)
temporal_res = 'daily'

In [4]:
### TODO: make a yaml dict
ext1 = [-141., -130., 54., 61.] # extent of SEAK 

## six communities
# -135.4519	58.1122, Hoonah (PAOH) 
# -135.3277	59.4538, Skagway (PAGY)
# -135.8894, 59.3988, Klukwan
# -139.671	59.5121, Yakutat (PAYA)
# -133.1358, 55.4769, Craig
# -132.4009, 55.5400, Kasaan

xs = [-135.4519, -135.3277, -135.8894, -139.671, -133.1358, -132.4009]
ys = [58.1122, 59.4538, 59.3988, 59.5121, 55.4769, 55.5400]
lbl1 = ['Hoonah', 'Skagway', 'Klukwan', 'Yakutat', 'Craig', 'Kasaan']
lbl_align = ['center', 'left', 'right', 'center', 'right', 'center'] # where the labels go

## closest station
# -135.4519	58.1122, Hoonah (PAOH) 
# -135.3277	59.4538, Skagway (PAGY)
# -135.5117	59.2429, Haines (PAHN)
# -139.671	59.5121, Yakutat (PAYA)
# -133.076	55.5792, Klawock (PAKW)
# -131.7117	55.3567, Ketchikan (PAKT)
xs2 = [-135.4519, -135.3277, -135.5117, -139.671, -133.076, -131.7117]
ys2 = [58.1122, 59.4538, 59.2429, 59.5121, 55.5792, 55.3567]
lbl2 = ['PAOH', 'PAGY', 'PAHN', 'PAYA', 'PAKW', 'PAKT']

### Import preprocessed SEAK-WRF precip data

In [5]:
# fname_pattern = path_to_work + 'SEAK-WRF-T2/WRFDS_T2_*.nc'
fname_pattern = path_to_work + 'SEAK-WRF-PCPT/WRFDS_PCPT_*.nc'
wrf = xr.open_mfdataset(fname_pattern, combine='by_coords')

if temporal_res == 'hourly':
    wrf = wrf
elif temporal_res == 'daily':
    wrf = wrf.resample(time="1D").sum('time')

ERROR 1: PROJ: proj_create_from_database: Open of /cw3e/mead/projects/cwp140/scratch/dnash/miniconda3/envs/SEAK-clim/share/proj failed


## Generate precipitation time series for each community

In [6]:
%%time

# diff_lat = wrf.lat.values[2] - wrf.lat.values[1]
# diff_lon = wrf.lon.values[2] - wrf.lon.values[1]
df_lst2 = []
row_lbl2 = []
for i, (slon, slat) in enumerate(zip(xs, ys)):
    
    if option == 'a':
        ## select nearest grid cell to station
        points = xr.Dataset({"lat": slat, "lon": slon})
        wrf.xoak.set_index(["lat", "lon"], "sklearn_geo_balltree")
        ds = wrf.xoak.sel(lat=points.lat, lon=points.lon)
    # elif option == 'b':
    #     scale = 1.5 ## "nearest neighbor" grid cells
    #     ds = wrf.sel(lat=slice(slat-diff_lat*scale, slat+diff_lat*scale), lon=slice(slon-diff_lon*scale, slon+diff_lon*scale))
    #     ds = ds.max(['lat', 'lon'])
    # elif option == 'c': 
    #     scale = 2.5 ## "nearest neighbor" grid cells plus buffer
    #     ds = wrf.sel(lat=slice(slat-diff_lat*scale, slat+diff_lat*scale), lon=slice(slon-diff_lon*scale, slon+diff_lon*scale))
    #     ds = ds.max(['lat', 'lon'])
    
    df = ds.PCPT.to_dataframe()
    df = df.rename(columns={"PCPT": lbl1[i]}) # rename precip column to the name of the community
    df_lst2.append(df)
    
    # make nice labels for plot
    lbl = u"{:.2f}\N{DEGREE SIGN}N, {:.2f}\N{DEGREE SIGN}W".format(slat, slon*-1)
    row_lbl2.append(lbl)

CPU times: user 13min 22s, sys: 23min 11s, total: 36min 34s
Wall time: 22min 16s


In [7]:
# df_new = []
# for i, df in enumerate(df_lst2):
#     df = df.drop(['lat', 'lon'], axis=1)
#     df_new.append(df)

In [8]:
## merge all dfs to one
df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)
## hack for weird behavior for daily df option a
if (option == 'a'):
    df_merged = df_merged.drop(['lat_x', 'lat_y', 'lon_x', 'lon_y'], axis=1)
    
df_merged

  df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)
  df_merged = reduce(lambda x, y: pd.merge(x, y, on = 'time'), df_lst2)


Unnamed: 0_level_0,Hoonah,Skagway,Klukwan,Yakutat,Craig,Kasaan
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-01,6.348206,0.024933,1.078705,0.031616,2.391998,3.185608
1980-01-02,10.577393,0.257446,1.675598,4.399902,0.661163,5.981506
1980-01-03,20.424011,0.000000,0.099213,0.848511,0.114136,0.127930
1980-01-04,0.193726,0.000000,0.000000,0.000000,0.106445,0.052307
1980-01-05,0.000000,0.000000,0.000000,2.024292,0.000000,0.000000
...,...,...,...,...,...,...
2019-12-27,1.046875,7.019531,0.000000,10.218750,1.460938,5.585938
2019-12-28,15.382812,23.589844,18.589844,19.695312,45.390625,32.343750
2019-12-29,0.000000,6.445312,6.800781,10.859375,0.062500,1.796875
2019-12-30,12.171875,22.492188,16.894531,24.093750,18.484375,8.210938


In [9]:
## save to csv file
df_merged.to_csv(path_to_out + 'SEAK_precip_max_{0}_{1}.csv'.format(option, temporal_res))