In [7]:
# Standard Python modules
import os, sys
import numpy as np
import pandas as pd
import xarray as xr
from functools import reduce
import glob

In [2]:
# Set up paths

path_to_data = '/home/dnash/SEAK_clim_data/'      # project data -- read only
path_to_work = '/work/dnash/SEAK_clim_data/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [3]:
### to pull precip time series from data
## choose which option
# (a) select the grid cell closest to each of the communities
# (b) select the 9 closest grid cells to each of the communities - take maximum value
# (c) select the 25 closest grid cells to each of the communities- take maximum value
option = 'a'

### choose which temporal resolution for the precipitation data (hourly or daily)
temporal_res = 'hourly'

In [4]:
### TODO: make a yaml dict
ext1 = [-141., -130., 54., 61.] # extent of SEAK

lonmin, lonmax, latmin, latmax = ext1
xs = [-135.4519, -135.3277, -135.8894, -139.671, -133.1358, -132.4009]
ys = [58.1122, 59.4538, 59.3988, 59.5121, 55.4769, 55.5400]
lbl1 = ['Hoonah', 'Skagway', 'Klukwan', 'Yakutat', 'Craig', 'Kasaan']
lbl_align = ['center', 'left', 'right', 'center', 'right', 'center'] # where the labels go

## closest station
# -135.4519	58.1122, Hoonah (PAOH) 
# -135.3277	59.4538, Skagway (PAGY)
# -135.5117	59.2429, Haines (PAHN)
# -139.671	59.5121, Yakutat (PAYA)
# -133.076	55.5792, Klawock (PAKW)
# -131.7117	55.3567, Ketchikan (PAKT)
xs2 = [-135.4519, -135.3277, -135.5117, -139.671, -133.076, -131.7117]
ys2 = [58.1122, 59.4538, 59.2429, 59.5121, 55.5792, 55.3567]
lbl2 = ['PAOH', 'PAGY', 'PAHN', 'PAYA', 'PAKW', 'PAKT']

In [8]:
def generate_IVT_tseries(era, xs, ys, community_lst, option, year):
    
    for i, (slon, slat) in enumerate(zip(xs, ys)):
        community = community_lst[i]
        print('Processing ', community)
        
        if option == 'a':
            ## select nearest grid cell to station
            ds = era.sel(lat=slat, lon=slon, method="nearest").load()
            df = ds.to_dataframe()
            
        elif option == 'b':
            scale = 1.5 ## "nearest neighbor" grid cells
            # get the dates for the current year to save in df later
            dates = era.time.values
            
            # get the latitude/longitude resolution
            diff_lat = era.lat.values[2] - era.lat.values[1]
            diff_lon = era.lon.values[2] - era.lon.values[1]
            
            # select the nearest neighbor grid cells
            ds = era.sel(lat=slice(slat-diff_lat*scale, slat+diff_lat*scale), lon=slice(slon-diff_lon*scale, slon+diff_lon*scale))
            
            ### localized IVT maxima at each time step
            event_max = ds.where(ds.IVT==ds.IVT.max(['lat', 'lon']), drop=True).squeeze().load()
            ## pull IVT and IVTDIR where ivt is max
            uvec = event_max.uIVT.values
            vvec = event_max.vIVT.values
            # ## if metpy installed
            # uvec = units.Quantity(uvec, "m/s")
            # vvec = units.Quantity(vvec, "m/s")
            # ivtdir = mpcalc.wind_direction(uvec, vvec)
            # ivtdir_vals = ivtdir.item()

            ivt_vals = event_max.IVT.values.tolist()
            iwv_vals = event_max.IWV.values.tolist()

            ## make a pandas df and put the info in 
            d ={"IVT": ivt_vals,
                "uIVT": uvec,
                "vIVT": vvec,
                "IWV": iwv_vals}
            df = pd.DataFrame(data=d)
        print('Writing ', community, year, 'to csv')
        ## save to csv file
        df.to_csv(path_to_out + 'IVT/{0}_IVT_{1}.csv'.format(community, year))
        

### Import ERA5 IVT data

In [10]:
%%time
## This is a lot of data so trying to process one file at a time
def preprocess(ds):
    '''keep only selected lats and lons'''
    return ds.sel(lat=slice(latmin, latmax), lon=slice(lonmin, lonmax))

ds_lst = []
for i, yr in enumerate(range(1980, 1981)):
    print('Reading ', yr)
    
    # get list of filenames that contain data from that year from current year folder
    filenames = []
    for name in glob.glob('/data/downloaded/Reanalysis/ERA5/IVT/{0}/ERA5_IVT_*.nc'.format(yr)):
        filenames.append(name)
    # sort filenames so they are in chronological order
    filenames = sorted(filenames)
    
    # loop through each file individually
    df_Hoonah = []
    df_Skagway = []
    df_Klukwan = []
    df_Yakutat = []
    df_Craig = []
    df_Kasaan = []
    for j, fname in enumerate(filenames):
        print('Processing', fname)
        era = xr.open_mfdataset(fname, combine='by_coords', preprocess=preprocess)
        if temporal_res == 'hourly':
            era = era
        elif temporal_res == 'daily':
            era = era.resample(time="1D").mean('time')
        
        df_lst = []
        for k, (slon, slat) in enumerate(zip(xs, ys)):
            community = lbl1[k]
            # print('Processing ', community)
            
            ## select nearest grid cell to station
            ds = era.sel(lat=slat, lon=slon, method="nearest").load()
            df = ds.to_dataframe()
            df_lst.append(df) # append the data for that file to this list
        
        # df_lst should be a list with 6 lists inside
        df_Hoonah.append(df_lst[0])
        df_Skagway.append(df_lst[1])
        df_Klukwan.append(df_lst[2])
        df_Yakutat.append(df_lst[3])
        df_Craig.append(df_lst[4])
        df_Kasaan.append(df_lst[5])
        
    
    

Reading  1980
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800101.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800102.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800103.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800104.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800105.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800106.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800107.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800108.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800109.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800110.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800111.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800112.nc
Processing /data/downloaded/Reanalysis/ERA5/IVT/1980/ERA5_IVT_19800113.nc
Processing /data/downloa

NameError: name 'year' is not defined

In [11]:
df_final = [df_Hoonah, df_Skagway, df_Klukwan, df_Yakutat, df_Craig, df_Kasaan]
for j, df_community in enumerate(df_final):
    df = pd.concat(df_community, ignore_index=True)
    community = lbl1[j]
    print('Writing ', community, yr, 'to csv')
    ## save to csv file
    df.to_csv(path_to_out + 'IVT/{0}_IVT_{1}.csv'.format(community, yr))

Writing  Hoonah 1980 to csv
Writing  Skagway 1980 to csv
Writing  Klukwan 1980 to csv
Writing  Yakutat 1980 to csv
Writing  Craig 1980 to csv
Writing  Kasaan 1980 to csv


In [None]:
%%time
## This is a lot of data so pulling from each year and processing one year at a time
def preprocess(ds):
    '''keep only selected lats and lons'''
    return ds.sel(lat=slice(latmin, latmax), lon=slice(lonmin, lonmax))

ds_lst = []
for i, yr in enumerate(range(1980, 1981)):
    print('Reading ', yr)
    filenames = '/data/downloaded/Reanalysis/ERA5/IVT/{0}/ERA5_IVT_*.nc'.format(yr) 
    era = xr.open_mfdataset(filenames, combine='by_coords', preprocess=preprocess)
    if temporal_res == 'hourly':
        era = era
    elif temporal_res == 'daily':
        era = era.resample(time="1D").mean('time')
    
    # calculate time series for each community based on grid option
    generate_IVT_tseries(era, xs, ys, lbl1, option, yr)


Reading  1980
Processing  Hoonah
Writing  Hoonah 1980 to csv
Processing  Skagway
Writing  Skagway 1980 to csv
Processing  Klukwan
Writing  Klukwan 1980 to csv
Processing  Yakutat
Writing  Yakutat 1980 to csv
Processing  Craig
Writing  Craig 1980 to csv
Processing  Kasaan
