In [None]:
import sys
import os
import time
import datetime as dt

import numpy as np
import scipy.ndimage
import pandas as pd
import matplotlib.pyplot as plt

from osgeo import gdal
from osgeo import osr
from osgeo.gdalconst import *
driver = gdal.GetDriverByName('GTiff')
driver.Register()



In [None]:
def interpolate_from_storageRatio(targetStorageRatio, column):
    #print(targetStorageRatio)
    if targetStorageRatio < min(relationship_df["norm_SoilWatVol"]):
        return 0
    upperStorageRatio = min(relationship_df[relationship_df["norm_SoilWatVol"] > targetStorageRatio]["norm_SoilWatVol"])
    lowerStorageRatio = max(relationship_df[relationship_df["norm_SoilWatVol"] < targetStorageRatio]["norm_SoilWatVol"])
    #print(upperStorageRatio, lowerStorageRatio)
    upperRatio = relationship_df.loc[relationship_df["norm_SoilWatVol"] == upperStorageRatio, column].iloc[0]
    lowerRatio = relationship_df.loc[relationship_df["norm_SoilWatVol"] == lowerStorageRatio, column].iloc[0]
    if upperRatio == lowerRatio:
        return upperRatio
    
    targetRatio = lowerRatio + (targetStorageRatio-lowerStorageRatio)*(upperRatio-lowerRatio)/(upperStorageRatio-lowerStorageRatio)
    return targetRatio



def plotFlow(times, day_month_series, precip, streamFlow, streamFlow_observed, m):    
    fig,ax = plt.subplots(figsize=(14,5.5))
    plt.gcf().text(0.65, 0.8, "05-01 to 10-25-2017, hourly data\ndecay m: {}".format(m), 
                       fontsize=13, color='black')
    ax.set_title(watershed_name + ' ', fontsize=20)
    #ax.set_ylabel(r'$\frac{A_{sat}}{A_{tot}}$', fontsize=22, labelpad=22, rotation='horizontal')
    ax.set_ylabel(r'Streamflow per unit area [mm/hr]', fontsize=16, labelpad=16, rotation='vertical')
    ax.set_xlabel("Time", fontsize=16, labelpad=10, rotation='horizontal')
    spaced_yticks = [min(streamFlow),
                     min(streamFlow)+(max(streamFlow)-min(streamFlow))*0.25,
                     min(streamFlow)+(max(streamFlow)-min(streamFlow))*0.5,
                     min(streamFlow)+(max(streamFlow)-min(streamFlow))*0.75,
                     max(streamFlow)]
    ax.set_yticks(spaced_yticks)
    ax.set_yticklabels(['{0:.4f}'.format(spaced_yticks[0]),
                        '{0:.4f}'.format(spaced_yticks[1]),
                        '{0:.4f}'.format(spaced_yticks[2]),
                        '{0:.4f}'.format(spaced_yticks[3]),
                        '{0:.4f}'.format(spaced_yticks[-1])], 
                       fontsize=13)
    first_days_of_month_1 = [first for first in day_month_series if '01-' in first]
    first_days_of_month = []
    for first in first_days_of_month_1:
        if first not in first_days_of_month:
            first_days_of_month.append(first)
    print(first_days_of_month)
    first_days_of_month_indices = [31*24, 61*24, 92*24, 123*24, 153*24, 178*24, 208*64]
    print(first_days_of_month_indices[:len(first_days_of_month)])
    ax.set_xticks(first_days_of_month_indices[:len(first_days_of_month)])
    ax.set_xticklabels(first_days_of_month)
    
    ax.plot(times, streamFlow_observed, color='k', linewidth=0.7)
    ax.plot(times, streamFlow, color='r', linewidth=0.7)
    
    ax2 = ax.twinx()
    #ax2.bar(times, precip, 0.2, color='dodgerblue')
    #print(precip.head())
    ax2.invert_yaxis()
    #ax2.set_ylabel(r'Precipitation [mm]', fontsize=16, labelpad=16, rotation='vertical')
    #y2_ticks = np.linspace(0, max(precip), max(precip))
    #y2_ticklabels = [str(i) for i in y2_ticks]
    #ax2.set_yticks(-1 * y2_ticks)
    #ax2.set_yticklabels(y2_ticklabels)
    fig.show()

    
    
def plotDeficitRatio(times, day_month_series, precip, deficitRatio, m):
    deficitRatio = deficitRatio*100
    fig,ax = plt.subplots(figsize=(14, 5.5))
    plt.gcf().text(0.65, 0.8, "05-01 to 10-25-2017, hourly data\ndecay m: {}".format(m), 
                       fontsize=13, color='black')
    ax.set_title(watershed_name + ' ', fontsize=20)
    #ax.set_ylabel(r'$\frac{A_{sat}}{A_{tot}}$', fontsize=22, labelpad=22, rotation='horizontal')
    ax.set_ylabel(r'% water deficit', fontsize=16, labelpad=16, rotation='vertical')
    ax.set_xlabel("Time", fontsize=16, labelpad=10, rotation='horizontal')
    spaced_yticks = [min(deficitRatio),
                     min(deficitRatio)+(max(deficitRatio)-min(deficitRatio))*0.25,
                     min(deficitRatio)+(max(deficitRatio)-min(deficitRatio))*0.5,
                     min(deficitRatio)+(max(deficitRatio)-min(deficitRatio))*0.75,
                     max(deficitRatio)]
    ax.set_yticks(spaced_yticks)
    ax.set_yticklabels(['{0:.2f}'.format(spaced_yticks[0]),
                        '{0:.2f}'.format(spaced_yticks[1]),
                        '{0:.2f}'.format(spaced_yticks[2]),
                        '{0:.2f}'.format(spaced_yticks[3]),
                        '{0:.2f}'.format(spaced_yticks[-1])], 
                       fontsize=13)
    ax.plot(times, deficitRatio, color='r', linewidth=0.7)
    
    ax2 = ax.twinx()
    ax2.bar(times, precip, 0.2, color='dodgerblue')
    #print(precip.head())
    ax2.invert_yaxis()
    ax2.set_ylabel(r'Precipitation [mm]', fontsize=16, labelpad=16, rotation='vertical')
    #y2_ticks = np.linspace(0, max(precip), max(precip)+1)
    #y2_ticklabels = [str(i) for i in y2_ticks]
    #ax2.set_yticks(-1 * y2_ticks)
    #ax2.set_yticklabels(y2_ticklabels)
    fig.show()


    
def plotAreaRatio(times, day_month_series, precip, areaRatio, m):
    areaRatio = areaRatio*100
    fig,ax = plt.subplots(figsize=(14,5.5))
    plt.gcf().text(0.65, 0.8, "05-01 to 10-25-2017, hourly data\ndecay m: {}".format(m), 
                       fontsize=13, color='black')
    ax.set_title(watershed_name + ' ', fontsize=20)
    #ax.set_ylabel(r'$\frac{A_{sat}}{A_{tot}}$', fontsize=22, labelpad=22, rotation='horizontal')
    ax.set_ylabel(r'% saturated area', fontsize=16, labelpad=16, rotation='vertical')
    ax.set_xlabel("Time", fontsize=16, labelpad=10, rotation='horizontal')
    spaced_yticks = [min(areaRatio),
                     min(areaRatio)+(max(areaRatio)-min(areaRatio))*0.25,
                     min(areaRatio)+(max(areaRatio)-min(areaRatio))*0.5,
                     min(areaRatio)+(max(areaRatio)-min(areaRatio))*0.75,
                     max(areaRatio)]
    ax.set_yticks(spaced_yticks)
    ax.set_yticklabels(['{0:.2f}'.format(spaced_yticks[0]),
                        '{0:.2f}'.format(spaced_yticks[1]),
                        '{0:.2f}'.format(spaced_yticks[2]),
                        '{0:.2f}'.format(spaced_yticks[3]),
                        '{0:.2f}'.format(spaced_yticks[-1])], 
                       fontsize=13)
    ax.plot(times, areaRatio, color='r', linewidth=0.7)
    
    ax2 = ax.twinx()
    ax2.bar(times, precip, 0.2, color='dodgerblue')
    #print(precip.head())
    ax2.invert_yaxis()
    ax2.set_ylabel(r'Precipitation [mm]', fontsize=16, labelpad=16, rotation='vertical')
    #y2_ticks = np.linspace(0, max(precip), max(precip)+1)
    #y2_ticklabels = [str(i) for i in y2_ticks]
    #ax2.set_yticks(-1 * y2_ticks)
    #ax2.set_yticklabels(y2_ticklabels)
    fig.show()


In [None]:
dir_path = os.path.realpath('./')
cwd = os.getcwd()

watershed_name = "Sleepers River"
watershed_foldername = "sleepers"
watershed_nick = "sleepers"
working_dir = (os.path.abspath(os.path.join(dir_path,'..')) + "/data/" + 
               watershed_foldername + os.path.sep)
n_days = 20000
run_number = 1
decaying_transmissivity = True
m = 0.02787428


dem = working_dir + watershed_nick + "_dem.tif"
elevations_ds = gdal.Open(dem, GA_ReadOnly)
band = elevations_ds.GetRasterBand(1)
cols, rows = elevations_ds.RasterXSize, elevations_ds.RasterYSize
elevationValues = band.ReadAsArray(0, 0, cols, rows)
geotransform = elevations_ds.GetGeoTransform()
resol = np.round(geotransform[1])
watershed_cells = np.where(elevationValues>0.0, 1, 0)
A = np.sum(watershed_cells) * resol * resol

#parameters for sand  
Ks = 20  #micrometers per second
Ks = Ks * 3600 / 1000000  #meters per hour
psi_b = 0.1  #bubbling pressure / capillary head, for sand
fi = 0.45  #porosity
Sy = 0.22  #specific yield
b = 0.0001354
n = 15

D = 2
#ET = 3/24
ETdict = {'Apr':0.0017/24, 'May':0.0017/24, 'Jun':0.0019/24, 'Jul':0.003/24,
     'Aug':0.003/24, 'Sep':0.003/24, 'Oct':0.003/24}
maxStorage = A * D * fi




run_foldername = "{}_{}_{}".format(watershed_nick, n_days, run_number) + os.path.sep
relationship_table_filename = run_foldername+watershed_nick+"_{}_{}_noDecay_ARatio_satFront_SoilWatVol.txt".format(n_days, run_number)
if decaying_transmissivity == True:
    run_foldername = "{}_{}_{}_decay{}".format(watershed_nick, n_days, run_number, m) + os.path.sep
    relationship_table_filename = run_foldername+watershed_nick+"_{}_{}_decay{}_ARatio_satFront_SoilWatVol.txt".format(n_days, run_number, m)
relationship_df = pd.read_csv(relationship_table_filename, header=0, sep='\t', index_col=0)
relationship_df.head()

precipitation_column_names = ["timestep", "day", "hour", "precip"]
precipitation_filename = working_dir+watershed_nick+"_2017_0401_1228_precipitation.csv"
precipitation_df = pd.read_csv(precipitation_filename, header=0, index_col=0, names=precipitation_column_names)
date_time_obj = pd.to_datetime(precipitation_df.day)
day_month = date_time_obj.apply(lambda x: x.strftime("%d-%b"))
precipitation_df["day_month"] = day_month
#precipitation_df = precipitation_df[precipitation_df["day"] >= "2017-05-01"]
#precipitation_df = precipitation_df[precipitation_df["day"] <= "10/25/2017"]
#print(precipitation_df["day"].head())
#print(precipitation_df.shape)

observation_filename = working_dir+watershed_nick+"_hourly_obs_2017_0401_1026.csv"
observation_column_names = ["timestep","day","hour","streamflow_cfs","streamflow_ms","ET_mhr"]
observation_df = pd.read_csv(observation_filename, header=0, index_col=0, 
                             usecols=[0,1,2,3,4,7], names=observation_column_names)
date_time_obj = pd.to_datetime(observation_df.day)
day_month = date_time_obj.apply(lambda x: x.strftime("%d-%b"))
observation_df["day_month"] = day_month
correct_date_format_Series = date_time_obj.apply(lambda x: x.strftime("%Y-%m-%d"))
observation_df["day"] = correct_date_format_Series
#observation_df = observation_df[observation_df["day"] <= "10/25/2017"]#2017-10-25"]
#print(observation_df)
#print(observation_df.shape)


In [None]:
output_Dict = {}
initialStorageRatio = 0.95
initialStorageVol = maxStorage * initialStorageRatio
initialDeficitRatio = 1 - initialStorageRatio
initialDeficitVol = maxStorage * initialDeficitRatio
initialAreaRatio = interpolate_from_storageRatio(initialStorageRatio, "AreaRatio")
print("initialAreaRatio is {}".format(initialAreaRatio))
initialAsat = initialAreaRatio * A
#initialFront = interpolate_from_storageRatio(initialStorageRatio, "Saturated_Front")

currentAsat = initialAsat
currentStorageVol = initialStorageVol
currentStorageRatio = initialStorageRatio
print("initialStorageRatio is {}".format(initialStorageRatio))
#currentFront = initialFront

timesteps = precipitation_df.index
print(timesteps)
initial_timestep = min(timesteps) + 29*24
final_timestep = initial_timestep + (24*(1+31*3+30*2+25))
#initial_timestep, final_timestep = min(timesteps), min(timesteps)+(24*(31+12))
print(initial_timestep, final_timestep)
range(min(timesteps),max(timesteps))

observed_streamFlow_mmhr = observation_df["streamflow_ms"][initial_timestep-1:final_timestep]*3600*1000  #mm/hr
b_estimate = (observed_streamFlow_mmhr.iloc[0]/1000)/(initialStorageRatio)**n  #m/hr
print(b_estimate)


In [None]:
sumP = 0
sumET = 0
sumQrunoff = 0
sumQsubsurface = 0
sumStreamFlow = 0
sumStorageVol = initialStorageVol
finalStorageVol = 0

for timestep in range(initial_timestep, final_timestep+1):
    print("Running timestep {}".format(timestep))
    previousAsat = currentAsat
    previousStorageVol = currentStorageVol
    previousStorageRatio = currentStorageRatio
    #previousFront = currentFront
    
    date = precipitation_df['day'].loc[timestep]
    hour = precipitation_df['hour'].loc[timestep]
    day_month = precipitation_df['day_month'].loc[timestep]
    P = precipitation_df['precip'].loc[timestep]/1000  #m/hr
    totalP = P * A  #m^3/hr
    sumP += totalP
    Qrunoff = P * previousAsat  #m^3/hr
    Qsubsurface = b_estimate * A * previousStorageRatio**n  #m^3/hr
    #ET = observation_df['ET_mhr'].loc[timestep]*A  #m/hr
    ET = ETdict[day_month[-3:]]*A
    sumET += ET
    deltaStorageVol = totalP - ET - Qrunoff - Qsubsurface
    #sumStorageVol += deltaStorageVol
    currentStorageVol = previousStorageVol + deltaStorageVol
    if currentStorageVol < 0:
        currentStorageVol = 0
    if currentStorageVol > maxStorage:
        Qrunoff += currentStorageVol - maxStorage
        Qsubsurface = b_estimate * A * 1**n
        currentStorageVol = maxStorage
    norm_Qrunoff = Qrunoff / A
    norm_Qsubsurface = Qsubsurface / A
    norm_streamflow = norm_Qrunoff + norm_Qsubsurface
    sumQrunoff += Qrunoff
    sumQsubsurface += Qsubsurface
    streamFlow = Qrunoff + Qsubsurface
    sumStreamFlow += streamFlow
    
    currentStorageRatio = currentStorageVol/maxStorage
    currentAreaRatio = interpolate_from_storageRatio(currentStorageRatio, "AreaRatio")
    currentAsat = currentAreaRatio * A
    #currentFront = interpolate_from_storageRatio(currentStorageRatio, "Saturated_Front")
    output_Dict[timestep] = [date, hour, day_month,
                             P*1000, Qrunoff, Qsubsurface,
                             currentStorageRatio, currentAreaRatio,
                             streamFlow, norm_streamflow*1000]
    finalStorageVol = currentStorageVol

print()
print(sumP, sumET, sumQrunoff, sumQsubsurface, sumStreamFlow, sumStorageVol)
print("Final water balance is {} m^3".format(sumP - sumET - sumStreamFlow))
print("{} - {} = {}".format(finalStorageVol, initialStorageVol, finalStorageVol-initialStorageVol))

output_df = pd.DataFrame.from_dict(output_Dict,
                                   orient = 'index',
                                   columns = ["date", "hour", "day_month",
                                              "P", "Qse","Qgw", 
                                              "storageRatio", "areaRatio",
                                              "model_m3hr", "model_mmhr"])

output_df["observ_mmhr"] = observed_streamFlow_mmhr

model_output_name = "{}{}_{}_{}_noDecay_modelOutput.csv".format(run_foldername, watershed_nick, 
                                                                               n_days, run_number)
if decaying_transmissivity == True:
    model_output_name = "{}{}_{}_{}_decay{}_modelOutput.csv".format(run_foldername, watershed_nick, 
                                                                             n_days, run_number, m)
print(model_output_name)    
output_df.to_csv(model_output_name, sep=',')

timesteps = output_df.index
day_month_series = output_df["day_month"]
hour_series = output_df["hour"]
date_series = output_df["date"]
precip = output_df["P"]
Qse = output_df["Qse"]
Qgw = output_df["Qgw"]
streamFlow = output_df["model_m3hr"]
norm_streamFlow = output_df["model_mmhr"]
storageRatio = output_df["storageRatio"]
areaRatio = output_df["areaRatio"]
#print(observed_streamFlow)

In [None]:
print(observed_streamFlow_mmhr.head())
plotFlow(timesteps, day_month_series, precip, norm_streamFlow, observed_streamFlow_mmhr, m)
plotDeficitRatio(timesteps, day_month_series, precip, 1-storageRatio, m)
plotAreaRatio(timesteps, day_month_series, precip, areaRatio, m)

In [None]:
b_estimate = (observed_streamFlow.iloc[1]/1000)/(initialStorageRatio)**n
print(observed_streamFlow.iloc[1])
print(btry/1000)

In [None]:

fig,ax = plt.subplots(figsize=(13, 5.5))
x_ = np.log(3600*output_df.norm_streamFlow)
y_ = np.log(3600*output_df.observed_mh)
#print(y_)
plt.gcf().text(0.65, 0.8, "05-01 to 10-25-2017, hourly data\ndecay m: {}".format(m), 
                   fontsize=13, color='black')
ax.set_title(watershed_name + ' ', fontsize=20)
#ax.set_ylabel(r'$\frac{A_{sat}}{A_{tot}}$', fontsize=22, labelpad=22, rotation='horizontal')
ax.set_ylabel(r'Observed streamflow [mm/hr]', fontsize=16, labelpad=16, rotation='vertical')
ax.set_xlabel("Modeled streamflow [mm/hr]", fontsize=16, labelpad=10, rotation='horizontal')
#spaced_yticks = [min(y_),
#                 min(y_)+(max(y_)-min(y_))*0.25,
#                 min(y_)+(max(y_)-min(y_))*0.5,
#                 min(y_)+(max(y_)-min(y_))*0.75,
#                 max(y_)]
#ax.set_yticks(spaced_yticks)
#ax.set_yticklabels(['{0:.4f}'.format(spaced_yticks[4]),
#                    '{0:.4f}'.format(spaced_yticks[3]),
#                    '{0:.4f}'.format(spaced_yticks[2]),
#                    '{0:.4f}'.format(spaced_yticks[1]),
#                    '{0:.4f}'.format(spaced_yticks[0])], 
#                   fontsize=13)
#ax.set_xticklabels([0, 0.002, 0.004, 0.006, 0.008, 0.10], fontsize=13)
plt.scatter(3600*output_df.norm_streamFlow, 3600*output_df.observed_mh, s=2, c='k', alpha=0.8)
#plt.axis([0, max(x_), 0, max(y_)])
fig.show()

In [None]:
"""
This script finds the start and end points of spikes in streamflow time-series for a single hydrogrpah.
The start and end points are used to create a new data frame of data that only includes the storm periods.
Acknowledgement:
This code is adapted from https://github.com/UVAdMIST/Norfolk_Groundwater_Model/blob/master/Preprocess/Storm_train_data.py
which is originally used for GWL data.

"""

import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy.signal import argrelmax, find_peaks
from scipy.ndimage.filters import gaussian_filter1d

n_lag = 0  # 58
n_ahead = 0  # 18

# load dataset

#data_path = r"C:\Users\feder\Documents\CUAHSI\resources\iman\02450250.csv"
observ_flow = np.asarray(3600*output_df.observ_mh, dtype='float64')
#dataset = pd.read_csv(data_path, header=0, sep=',', index_col=None, parse_dates=True,
#                      infer_datetime_format=True)
#model_flow = np.asarray(dataset["Discharge"], dtype='float64')


# find peaks
# max_idx = argrelmax(second_dev_smoothed)[0]
#scipy.signal.find_peaks(x, height=None, threshold=None, distance=2, 
#                        prominence=2, width=None, wlen=None, rel_height=0.5, 
#                        plateau_size=None)[source]¶
found_peaks = find_peaks(observ_flow, 
                         #prominence=2, 
                         distance=2,
                         height=0.4,
                         width=3)
#print(found_peaks)
peak_times = found_peaks[0]+721
peak_start_times = np.floor(found_peaks[1]['left_ips'])+721
peak_end_times = np.ceil(found_peaks[1]['right_ips'])+721
print(peak_times, peak_start_times, peak_end_times)
print(output_df.loc[output_df.index.isin(peak_times)]['day_month'])
output_df["is_peak"] = "NO"
output_df.loc[output_df.index.isin(peak_times),'is_peak'] = "YES"
output_df.loc[output_df.index.isin(peak_times),'peak_start'] = peak_start_times
output_df.loc[output_df.index.isin(peak_times),'peak_end'] = peak_end_times
#print(output_df[0:4969])
#print(output_df.loc[721:4966,'day_month'])
output_df["mw_start"] = output_df.index-3
output_df["mw_end"] = output_df.index+3
output_df.to_csv(model_output_name, sep=',')

In [None]:
# use first and second derivative
fig = plt.figure(figsize=(8, 14))
gs = gridspec.GridSpec(7, 1)

ax0 = plt.subplot(gs[0])
ax0.set_title('GWL')
ax0.plot(observ_flow)

first_dev = np.gradient(observ_flow)
ax1 = plt.subplot(gs[1])
ax1.set_title('1st derivative')
ax1.plot(first_dev)

second_dev = np.gradient(first_dev)
ax2 = plt.subplot(gs[2])
ax2.set_title('2nd derivative')
#ax2.plot(second_dev)

first_dev_clipped = np.clip(np.abs(np.gradient(first_dev)), 0.0001, 2)
ax3 = plt.subplot(gs[3])
ax3.set_title('first derivative absolute value + clipping')
#ax3.plot(first_dev_clipped)

second_dev_clipped = np.clip(np.abs(np.gradient(second_dev)), 0.0001, 2)
ax4 = plt.subplot(gs[4])
ax4.set_title('second derivative absolute value + clipping')
#ax4.plot(second_dev_clipped)

first_dev_smoothed = gaussian_filter1d(first_dev, 5)
ax5 = plt.subplot(gs[5])
ax5.set_title('Smoothing applied to 1st derivative')
#ax5.plot(first_dev_smoothed)

second_dev_smoothed = gaussian_filter1d(second_dev_clipped, 5)
ax6 = plt.subplot(gs[6])
ax6.set_title('Smoothing applied to second derivative')
#ax6.plot(second_dev_smoothed)

plt.tight_layout()
plt.show()

# max_1stdev = argrelmax(first_dev_smoothed)[0]
# print(max_idx, max_1stdev)
# find indices where first derivative == 0
first_dev_zeros = np.where(first_dev == 0)
first_dev_zeros = first_dev_zeros[0]
first_dev_zeros = np.insert(first_dev_zeros, 0, 33)  # MMPS-175, add 33 to beginning because there was no start

# find location of max first derivative
found_first_dev = find_peaks(first_dev_smoothed, height=2)
# found_second_dev = find_peaks(second_dev_smoothed, height=0.00015, distance=100, prominence=0.0002)
# found_second_dev = np.asarray(found_second_dev[0])

# find indices of zero values that bracket max value
start_list = []
end_list = []
for i in found_first_dev[0]:
    min_list = []
    max_list = []
    for j in first_dev_zeros:
        if i < j:
            min_list.append(j)
            # print("appended", j, "to min_list")
        if i > j:
            max_list.append(j)
            # print("appended", j, "to max_list")
        else:
            continue
    end_list.append(min(min_list))
    start_list.append(max(max_list))
    # end_list.append(max(max_list)-5)

# create pairs of start and peak values
potential_start = []
potential_peak = []
for i in range(0, len(start_list), 1):
    for j in range(0, len(found_peaks[0]), 1):
        if i < len(start_list)-1:
            if start_list[i] < found_peaks[0][j]:
                if found_peaks[0][j] < start_list[i+1]:
                    # print(start_list[i], found_peaks[0][j])
                    potential_start.append(start_list[i])
                    potential_peak.append(found_peaks[0][j])
        if i == len(start_list)-1:
            if start_list[i] < found_peaks[0][j]:
                # print(start_list[i], found_peaks[0][j])
                potential_start.append(start_list[i])
                potential_peak.append(found_peaks[0][j])

# filter pairs of values to remove ones that have the same start
filtered_start = []
filtered_peak = []
for i in range(0, len(potential_start), 1):  # potential start and peak list have same length
    if potential_start[i] != potential_start[i-1]:
        # print(potential_start[i], potential_peak[i])
        filtered_start.append(potential_start[i])
        filtered_peak.append(potential_peak[i])

# add lag data plus forecast to start and forecast data plus 24hrs to end
final_start = []
final_peak = []
for i, j in zip(filtered_start, filtered_peak):
    new_start = i - (n_lag + n_ahead)
    new_peak = j + n_ahead
    # print(new_start, new_peak)
    final_start.append(new_start)
    final_peak.append(new_peak)

# save dates of start and end lists
df_list = []
df_len_list = []
for i, j in zip(final_start, final_peak):
    print(i, j)
    df = dataset.iloc[i:j+1]
    df_len = len(df)
    df_list.append(df)
    df_len_list.append(df_len)
storms_df = pd.concat(df_list).drop_duplicates().reset_index(drop=True)

# get average length of storms
storm_avg = round(sum(df_len_list)/len(df_len_list))
print("Average len of storms", "is: ", storm_avg)

# shuffle dfs in df_list to create 1000 bootstrap replicates
count = 0
while count <= 1000:
    if count == 0:
        bs_df = pd.concat(df_list).drop_duplicates().reset_index(drop=True)
        bs_df.to_csv(bs_path + "bs0.csv", index=False)
    if count >= 1:
        if count % 25 == 0:
            print(count)
        df_list2 = df_list
        bs_df_list = random.choices(df_list2, k=len(df_list))  # this samples df_list with replacement
        bs_df = pd.concat(bs_df_list).reset_index(drop=True)
        bs_df.to_csv(bs_path + "bs" + str(count) + ".csv", index=False)
    count += 1

# plot observed gwl with start, end, and max f' points
fig, ax = plt.subplots()
ax.set_xlabel('Time Index')
ax.set_ylabel('Streamflow (cfs)')
ax.plot(model_flow)
# ax.scatter(found_first_dev[0], gwl_test[found_first_dev[0]], marker='o', color='blue', label="Max f'")
ax.scatter(final_start, model_flow[final_start], marker='x', color='red', label='Start')
ax.scatter(final_peak, model_flow[final_peak], marker='P', color='k', label='Peak')
# ax.scatter(start_list, gwl_test[start_list], marker='x', color='red', label='Start')
# ax.scatter(end_list, gwl_test[end_list], marker='^', color='green', label='End')
# ax.scatter(found_peaks[0], gwl_test[found_peaks[0]], marker='P', color='k', label='Peak')
# ax.scatter(first_dev_zeros, gwl_test[first_dev_zeros], marker='*', color='purple', label='first_dev_zeros')
# ax.scatter(max_idx, gwl_test[max_idx], marker='p', color='orange', label='max_idx')
# ax.scatter(found_peaks[0], gwl_test[found_peaks[0]], marker='P', color='k', label='found_peaks')
plt.legend()
plt.tight_layout()
plt.show()
data_path = "C:/Users/feder/Documents/CUAHSI/repos/Aquaholics_Anonymous/Topmodel_with_physics/sleepers_20000_1_decay0.05"
storms_df.to_csv(data_path + "_no_blanks_SI_storms.csv", index=False)