## Jupyter (Python) notebook to generate the study sites shapefile, and to extract precipitation, evapotranspiration, and EVI data from geospatial datasets. 


In [33]:
import os
import sys
import numpy as np
import glob
import geopandas as gp
import georasters as gr
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

sitelist = [str(item) for item in [11154700,
11200800,
11299600,
11046360,
11180825,
11046300,
11180960,
11182500,
11449500,
11379500,
11284400,
11224500,
11253310,
11141280,
11151300,
11469000,
11111500,
11176400,
11172945,
11132500,
11475800,
11134800,
11180900,
11475560,
11476600]]

## Get dataframe of site properties and polygons

In [34]:
basins_df = gp.read_file('../data/basins/basins18_utm.shp')[['SITE_NO', 'geometry']]
usgs_df = gp.read_file('../data/USGS_gages/USGS_Streamgages-NHD_Locations.shp')[['SITE_NO', 'STATION_NM']]
sites = basins_df.merge(usgs_df, on='SITE_NO').set_index('SITE_NO').loc[sitelist]
sites['gauge_id'] = sites.index

# Add Dry Creek catchment, which is not USGS
dry = gp.read_file('../data/dry_creek_polygon/dry.shp')
sites = sites.append({'gauge_id':'00000000', 'geometry':dry.geometry.values[0], 'STATION_NM':'Dry Creek'}, ignore_index=True)
sites.index = sites.gauge_id
sites.dropna(axis=0, inplace=True)

# Save sites to shapefile
sites.to_file('../data/sites.shp')

In [35]:
# function to download USGS flow data (fixed dates 1980 - 2017)
# do not download new data if flow data in ../data/flowdata/
def getFlow(site):
    try: 
        df = pd.read_csv('../data/flow_data/' + site + '.csv', parse_dates=True, index_col='datetime')
    except:
        url = 'https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&site_no=' + site + '&referred_module=sw&period=&begin_date=2018-01-01&end_date=2018-10-1'
        df = pd.read_csv(url, header=31, delim_whitespace=True)
        df.columns = ['usgs', 'site', 'datetime', 'q', 'a']
        df.index = pd.to_datetime(df.datetime)
        df = df[['q']]
        df.q = df.q.astype(float, errors='ignore')
        df.to_csv('../data/flow_data/' + site + '.csv')
    return df

## Download and save discharge data

In [36]:
# save a collection of dataframes with flow
discharge_dfs = []
dates = pd.date_range('1980-01-01', '2018-12-31')
for i,row in sites.iterrows():
    gagestr = row.gauge_id
    if gagestr=='00000000':
        continue
    try:
        df = getFlow(gagestr)
    except:
        sites = sites.loc[sites.gauge_id!=gagestr]
        continue
        
    rng = df.index
    df = pd.DataFrame.from_dict({gagestr:df.q.values}).set_index(rng)
    df = df*2.44657555e12 # convert to mm^3/day
    area = row.geometry.area*1e6 # area to mm
    df = df/area # flow in mm/day
    df = df.reindex(dates)
    discharge_dfs.append(df)

# Incorporate dry creek discharge data
dryq = pd.read_csv('../data/dry_creek_discharge.csv', index_col=0, parse_dates=True)
discharge_dfs.append(dryq)
discharge_df = pd.concat(discharge_dfs, axis=1)
discharge_df = discharge_df.resample('M',label='left', loffset='1D', closed='right', how=lambda x: x.values.sum())
discharge_df.to_csv('../data/discharge_df.csv')

## Extract raster data from monthly PRISM rainfall and ET rasters

In [10]:
# Get list of precip raster files
precip_files = glob.glob('../data/monthly_ppt/2*/*.tif')
data = np.zeros((len(precip_files), len(sites)))
cols = sites.gauge_id.values
dts = []
for i,f in enumerate(precip_files):
    rast = gr.from_file(f)
    rast.nodata_value = np.nan
    # get timestamp
    dts.append(pd.to_datetime(f[-10:-4], format='%Y%m'))
    # get this month's precip for each site
    datacurr = [item.raster.data for item in rast.clip(sites.geometry)]
    data[i,:] = [np.mean(item[item>=0]) for item in datacurr]

precip = pd.DataFrame(data, index=dts, columns=cols).sort_index()

# Get list of ET raster files
et_files = glob.glob('../data/monthly_ET/*.tif')
data = np.zeros((len(et_files), len(sites)))
cols = sites.gauge_id.values
dts = []
for i,f in enumerate(et_files):
    rast = gr.from_file(f)
    rast.nodata_value = np.nan
    dts.append(pd.to_datetime(f[-11:-4], format='%m-%Y'))
    datacurr = [item.raster.data for item in rast.clip(sites.geometry)]
    data[i,:] = [np.mean(item[item>=0]) for item in datacurr]
et = pd.DataFrame(data, index=dts, columns=cols).sort_index()

# Save extracted data
precip.to_csv('../data/precip_sites.csv')
et.to_csv('../data/et_sites.csv')

# Process and save EVI extracted from Earth Engine 

The Google Earth Engine script to extract and download the EVI geojson data can be found [HERE](https://code.earthengine.google.com/a15f4246f8bbc070913a9c0217a1b031). The file generated by Earth Engine is located at `../data/mean_modis_evi.geojson`

In [14]:
evi = gp.read_file('../data/modis_mean_evi.geojson')
evi['datetime'] = pd.to_datetime(evi.date, format='%Y_%m_%d')

# the EarthEngine multplier for EVI product is 0.0001
evi = 0.0001*evi.pivot(index='datetime', columns='gauge_id', values='mean').sort_index().resample('MS').mean()
evi = evi[sites.index]
evi.to_csv('../data/evi_sites.csv')