# FutureFish data processing
### This notebook reads in stream temperature and streamflow projections under climate change for the Pacific Northwest. It creates a dataset which combines the two datasets on a common coordinate system and common temporal structure.
### The notebook requires that the datasets have been downloaded from the following locations:
* Stream temperature: https://www.fs.fed.us/rm/boise/AWAE/projects/NorWeST.html
* Streamflow: www.hydro.washington.edu/CRCC


In [None]:
%pylab inline
import os
import glob
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gp
import matplotlib as mpl
import matplotlib.pyplot as plt
from jupyterthemes import jtplot
import utm
from scipy.spatial import KDTree
jtplot.style(jtplot.infer_theme(), context='paper', fscale=2)
jtplot.figsize(x=20, y=12)
from futurefish.data_processing import calcLatLon, get_model_ts, 
    metric_min7day_streamflow, locate_nearest_neighbor_values,
    create_collated_dataset_temperature, convert_coordinates

# Set this parameter where you have downloaded the stream temperature files
SHAPEFILES = glob.glob('../../data/**/**/*.shp')

STREAMFLOW_META = '../futurefish/data/full_site_test_dataset.csv'

# Set this parameter given your own file system with the location where
# you have downloaded the streamflow files
streamflow_file_directory = 

### The stream temperature dataset includes temperature projections for two climatalogical periods, the 2040s and the 2080s. There are a variety of modeling options, but we will select out:

* S39_2040DM - Future Maximum Weekly Maximum Temperature (MWMT or 7DADM) stream scenario based on global climate model ensemble average projected changes for the A1B warming trajectory in the 2040s (2030-2059). Future stream deltas within a NorWeST unit account for differential sensitivity among streams so that cold streams warm less than warm streams
* S41_2080DM -  Future Maximum Weekly Maximum Temperature (MWMT or 7DADM) stream scenario based on global climate model ensemble average projected changes for the A1B warming trajectory in the 2080s (2070-2099). Future stream deltas within a NorWeST unit account for differential sensitivity among streams so that cold streams warm less than warm streams
* We will also select out the 14 historic years corresponding to the column names 'S3_1993', 'S4_1994', 'S5_1995', 'S6_1996', 'S7_1997', 'S8_1998', 'S9_1999', 'S10_2000', 'S11_2001', 'S12_2002', 'S13_2003', 'S14_2004', 'S15_2005'. These 14 years will be averaged to represent the historic time period.

These three different periods will be compared to streamflow volume projections for coinciding time periods.

In [None]:
dataframes = [gp.GeoDataFrame.from_file(shpfile) for shpfile in SHAPEFILES]
gdf = gp.GeoDataFrame(pd.concat(dataframes, ignore_index=True))

In [None]:
# Extract out the variables we want to use because it's a large dataset
# and a smaller sample will be faster to work with
gdf_selected_columns_future = gdf[['S39_2040DM', 'S41_2080DM', 'geometry']]
translating_temperature_keys_dictionary = {'Historical': 'Stream Temperature Historical',
                                           'S39_2040DM': 'Stream Temperature 2040s',
                                         'S41_2080DM':  'Stream Temperature 2080s'}
# Remove the sites with NaNs according to the future simulations' availability
cleaned_up_gdf_future = gdf_selected_columns_future[gdf_selected_columns_future['S39_2040DM'].notnull()]

In [None]:
# These are the columns pertaining to the historic period within the 
# temperature dataset.
gdf_selected_columns_historical = gdf[['S3_1993', 'S4_1994', 'S5_1995', 'S6_1996', 'S7_1997',
                            'S8_1998', 'S9_1999', 'S10_2000', 'S11_2001', 'S12_2002',
                           'S13_2003', 'S14_2004', 'S15_2005', 'geometry']]
gdf_historical = gdf_selected_columns_historical.mean(axis=1)
# Remove the sites with NaNs according to the future simulations' availability
cleaned_up_gdf_historical = gdf_historical[gdf_selected_columns_future['S39_2040DM'].notnull()]
cleaned_up_gdf_historical = pd.DataFrame(cleaned_up_gdf_historical, columns=['Historical'])

In [None]:
# Convert the coordinates from eastings/northings to degrees longitude
# and degrees latitude
temperature_sites = convert_coordinates(gdf, 1500000)

In [None]:
streamflow_sites = pd.read_csv(STREAMFLOW_META)
# Select out the sites in the United States because the temperature data
# is only available in the U.S. So, south of the 49th parallel!
streamflow_sites = streamflow_sites[streamflow_sites['Latitude'] < 49 ]

In [None]:
# Create the dataframe with temperature projections. In later steps the
# dataframe will receive streamflow projections as well.
collated_dataset = create_collated_dataset_temperature(translating_temperature_keys_dictionary,
                           streamflow_sites,
                           cleaned_up_gdf_future,
                           cleaned_up_gdf_historical)

In [None]:
# Specify the time slices which align with the temperature timeframes
# prescribed in the stream temperature metadata.
streamflow_timeframes = {'Streamflow Historical': slice('1992-10-01', '2003-09-30'),
                        'Streamflow 2040s': slice('2029-10-01', '2059-09-30'),
                        'Streamflow 2080s': slice('2069-10-01', '2099-09-30')}
for site in streamflow_sites['Site ID']:
# Read in the streamflow files
    streamflow_file = streamflow_file_directory+'CCSM4_RCP85_MACA_VIC_P2-'+site+'-streamflow-1.0.csv'
    df = get_model_ts(streamflow_file)
    for (label, timeframe) in streamflow_timeframes.items():
# For each timeframe populate the collated dataset with the calculated
# 10th percentile minimum 7 day streamflow
        collated_dataset.set_value(site, label, 
                                   metric_min7day_streamflow(df, 
                                        timeframe).quantile(q=0.1))

In [None]:
# Write out the collated dataset to a csv file for use by the 
# fish viability model
collated_dataset.to_csv('../futurefish/data/sites_streamflow_stream_temperature.csv')