# ASPA-UH streamflow data processing

### Readme

This repository is the cloud-home of the UH-ASPA stream gauge project, which is currently mainteined by C. Shuler (UH) and M. Erickson (ASPA) 




### Notes about Base Data. 
The script uses two supplimentary datasets that can be added to if additional measurements are taken (so that rating curves can self-update) or if the stream gauge housing has a change in elevation, either from being physically moved or if the object it is mounted to is offset somehow ( so that the script can automatically correct for any known movements in sensor elevation). It is imperitive that these datasets are kept consistent and up to date.


In [1]:
# import all libraries (ensure all are listed in the environment.yml file)
import os
import pandas as pd
import matplotlib 

#### Functions used in this notebook

In [2]:
from datetime import date, datetime, timedelta
def datetime_range(start, end, delta):
    current = start

#### Define Which Stations to analyze

# Daily flow totalizing

In [None]:
plt.close("all")  # close previous figures to clear memory
# this subsamples the data into daily values

Curve_type =   '2d_poly_fit_Modeled_Q'    # 'Man_q_m3/sec'    or    'Power_law_Modeled_Q'       or        '2d_poly_fit_Modeled_Q'

master_day_flow = {}
for i in stations:

    day_flow = All_masters_adjusted[i].set_index('Date and Time').resample('D').mean() 
    day_flow = day_flow.reset_index(drop=False)    
    master_day_flow[i] = day_flow

    fig = plt.figure(figsize=(12, 5))
    ax1 = plt.axes()
    ax1.set_title(i, color='darkblue')
    plt.plot(All_masters_adjusted[i]['Date and Time'], All_masters_adjusted[i][Curve_type], alpha = .7, color='b',  marker = '.', label = "15 minute flows [m3/s]")
    plt.plot(day_flow['Date and Time'], day_flow[Curve_type] , alpha = .7, color='y',  marker = '.', label = "Daily average flows [m3/s]")
    ax1.legend(loc=2)

    plt.xticks(rotation=20)
    plt.tight_layout()
    
Average_daily_flows = pd.DataFrame({'Date and Time':day_flow['Date and Time']}) # create empty dtaframe with random date column from above
Raws ={}                                                                        # duplicate daa storage formatting for baseflow separation below

for i in stations: 
    Qs = master_day_flow[i][['Date and Time', Curve_type]].copy()
    keynam = i+"_CFS"
    Qs[keynam] = Qs[Curve_type]*35.314666212661                    # convert flow in m3/s to cfs
    del Qs[Curve_type]
    Average_daily_flows = Average_daily_flows.merge(Qs, how='outer', on='Date and Time')
    print("{} ave flow {} CFS".format(i, Qs[keynam].mean()))
         
    Qs = Qs.rename(index=str, columns={"Date and Time": "Date", keynam: "Total Flow (cfs)"})
    Raws[i] = Qs                                                               # duplicate daa storage formatting for baseflow separation below

Average_daily_flows = Average_daily_flows.sort_values('Date and Time')  # here is the sorted daily streamflow dataframe from all stations lined up on date NOTE USING Curve_type FROM ABOVE can change if want

# Baseflow separation 

Note this uses the Turning Point method, described in "U.S. Geological Survey Groundwater Toolbox, A Graphical and Mapping Interface for Analysis of Hydrologic Data (Version 1.0)—User Guide for Estimation of Base Flow, Runoff, and Groundwater Recharge From Streamflow Data,  
Pg. 2 explains the process and cites The BFI program (Wahl and Wahl, 1995) as the method used

In [None]:
plt.close("all")  # close previous figures to clear memory

N = 5                     # averaging window, number of days
tp_test_factor = 0.9      # turning point test factor     (If 90 percent of a given minimum (the “turning point test factor”) is less than both adjacent minimums, then that minimum is a turning point.)

All_stations = {}                                                                           # eill be final processed dictionary of dataframes
Site=[]; SumTotal=[]; SumBF=[]; SumRO=[]; AveTotal=[]; AveBF=[]; AveRO=[]; BFTF=[]; ROTF=[]  # lists for sumary dataframe

for stato in stations:
    mean_dates = []; mins = []; means = []
    for i in range(1,len(Raws[stato]['Date'])-N,N):                         # mikes code, still not sure I understand it all
        N_day_data = []
        for j in range(0,N-1):
            N_day_data.append(float(Raws[stato]['Total Flow (cfs)'][i+j]))
        mean_dates.append(Raws[stato]['Date'][i+(N//2)])
        mean_point = [Raws[stato]['Date'][i+(N//2)]]
        N_day_mean = np.mean(N_day_data)
        mean_point.append(N_day_mean)
        means.append(mean_point)
        min_point = [Raws[stato]['Date'][i+(N//2)]]
        N_day_min = np.min(N_day_data)
        min_point.append(N_day_min)
        mins.append(min_point)

    turning_points = []; tp_dates = []; tp_flow = []; sf_dates = []; sf_flow = [];     # mikes code, still not sure I understand it all
    for i in range(0,len(mins)-1,1):
        if (tp_test_factor*(mins[i][1]))<mins[i+1][1] and (tp_test_factor*(mins[i][1]))<mins[i-1][1]:
            turning_points.append(mins[i])
            tp_dates.append(mins[i][0])
            tp_flow.append(mins[i][1])

    Total_flows = pd.DataFrame({'Date': Raws[stato]['Date'], 'Total_flow_CFS': Raws[stato]['Total Flow (cfs)'] })
    Baseflows = pd.DataFrame({'Date': tp_dates, 'Base_flow_CFS': tp_flow })  
    All_flows = Total_flows.merge(Baseflows, how='outer', on='Date')                     # final dataframe with separated values of flow
    All_flows['Base_flow_CFS'].interpolate(inplace=True)                                 # baseflows were only calculated at turning points. here linearly interpolate to give a value for each day
    All_flows['Runoff_CFS'] = All_flows['Total_flow_CFS'] - All_flows['Base_flow_CFS']
    All_flows['Runoff_CFS'] = All_flows['Runoff_CFS'].clip(lower=0)    # convert any negative runoff values to zero
    
    All_stations[stato] = All_flows                      # create final dictionary                            
    
    # all this to create the summary dataframe for comparison
    Site.append(stato); SumTotal.append(All_flows['Total_flow_CFS'].sum()); SumBF.append(All_flows['Base_flow_CFS'].sum()); SumRO.append(All_flows['Total_flow_CFS'].sum()-All_flows['Base_flow_CFS'].sum())
    AveTotal.append(All_flows['Total_flow_CFS'].mean()); AveBF.append(All_flows['Base_flow_CFS'].mean()); AveRO.append(All_flows['Total_flow_CFS'].mean()-All_flows['Base_flow_CFS'].mean())
    BFTF.append((All_flows['Base_flow_CFS'].mean()/All_flows['Total_flow_CFS'].mean()))
    
    # plot stuff
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(All_flows['Date'],All_flows['Total_flow_CFS'], '-',label='Total daily flow', marker='.')
    ax.plot(All_flows['Date'],All_flows['Base_flow_CFS'], '-',label='Baseflow', marker='.')
    ax.set_title(stato)
    ax.legend()
    plt.ylabel('Discharge (CFS)')
    
    plt.xticks(rotation=20)
    plt.tight_layout()

### produce streamflow statistics, yearly, and monthly

In [None]:
# create the annual summary dataframe
Annual_stats_all = pd.DataFrame({'Site':Site,'Ave_Total_Q_[cfs]':AveTotal,'AveBF_[cfs]':AveBF,'Ave RO_[cfs]':AveRO,'BF:Q':BFTF})

# throw a csv of the annual stats up on the desktop
Annual_stats_all.to_csv(os.path.join(workspace, "Annual_UH-ASPA_streamflows.csv"))

# Consolidate by a dictionary of stations {Station_stats} with an entry for each month
Station_stats ={}
for stato in stations:
    All_stations[stato]['Month'] = All_stations[stato]['Date'].apply(lambda i: i.month)
    M = []; TF_sum = []; BF_sum =[]; RO_sum = []
    for i in All_stations[stato]['Month'].unique():
        a = All_stations[stato][All_stations[stato]['Month'] == i]
        tf = a['Total_flow_CFS'].mean()
        bf = a['Base_flow_CFS'].mean()
        ro = a['Runoff_CFS'].mean()
        M.append(i); TF_sum.append(tf); BF_sum.append(bf); RO_sum.append(ro)
    tica  = pd.DataFrame({'Month':M,'Total_flow':TF_sum,'Baseflow':BF_sum, 'Runoff':RO_sum,})
    tica['BF:TF'] = tica['Baseflow']/tica['Total_flow']
    tica['Site'] = stato
    Station_stats[stato] = tica
    
# Consolidate by a dictionary of months  {Monthly_stats} with an entry for each station
Monthly_stats = {}
month_key = {1:'January',  2:'February',  3:'March',  4:'April',  5:'May',  6:'June',  7:'July',  8:'August',  9:'September',  10:'October',  11:'November',  12:'December'}
for m in range(1,13):
    mogo = pd.DataFrame()
    for s in stations:
        lineo = Station_stats[s][Station_stats[s]['Month'] == m]
        mogo = pd.concat([mogo, lineo], axis=0)
    Monthly_stats[month_key[m]] = mogo

# print out a daily streamflow CSV for each station
for i in All_stations.keys():
    All_stations[i].to_csv(os.path.join(workspace, "{}_Daily_streamflowCFS_{}_draft.csv".format(i, str(date.today()))))
    
# print out a monthly average streamflow CSV for each month
for m in Monthly_stats.keys():
    Monthly_stats[m].to_csv(os.path.join(workspace, "{}_MonthlyAve_flowCFS_{}_draft.csv".format(m, str(date.today()))))