In [154]:
import os
import pandas as pd
import numpy as np
import matplotlib 
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import datetime
from dateutil import parser, rrule
from datetime import datetime, time, date
%matplotlib notebook
import scipy
from scipy import stats

from IPython.display import display, HTML
display(HTML(data=""" <style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 85%; }
    div#maintoolbar-container { width: 99%; } </style> """))

## Plot out some quick Stats

In [2]:
# Create a metadata describe frame for the Reservoir data 

station_id = []; datemin = []; datemax = []; datamean = []
datamedian = []; datamax= []; datamin=[]


for file in os.listdir(os.path.join("..", "Data/Processed")):
    data = pd.read_csv(os.path.join("..", "Data/Processed", file), parse_dates=['date'])
    data = data.replace(-66577, np.nan)
    
    station_id.append(file.split(".")[0])
    datemin.append(data['date'].min())
    datemax.append(data['date'].max())
    datamean.append(data['data'].describe()[1])
    datamedian.append(data['data'].describe()[5])
    datamin.append(data['data'].describe()[3])
    datamax.append(data['data'].describe()[7])
    
data_describe = pd.DataFrame({'station_id':station_id, 
              "date_min":datemin,
              "date_max":datemax ,
              "mean_WL":datamean ,
              "median_WL": datamedian,
              "min_WL":datamin ,
              "max_WL":datamax           
             })

data_describe.to_csv("data_describe_stats.csv")

data_describe.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,station_id,date_min,date_max,mean_WL,median_WL,min_WL,max_WL
0,WL_EDD00214,2018-12-31 22:09:00,2022-03-23 18:04:00,1484.26588,1475.0,1315.0,1907.0
1,WL_EDD00CC6,2018-12-31 22:09:00,2022-03-23 18:04:00,878.638709,829.0,-125738.0,131071.0
2,WL_EDD01FB0,2021-03-16 19:09:00,2021-07-27 18:04:00,2373.487317,2032.0,1926.0,4568.0
3,WL_EDD024F8,2018-12-31 22:09:00,2022-03-23 18:04:00,38163.755282,38138.0,-92938.0,59639.0
4,WL_EDD02A2A,2018-12-31 22:09:00,2022-03-23 18:04:00,96783.423337,97217.0,-74419.0,131071.0
5,WL_EDD0378E,2019-02-07 18:10:00,2022-03-23 18:05:00,3670.233445,3670.0,2616.0,4541.0
6,WL_EDD0395C,2018-12-31 22:10:00,2022-03-23 18:05:00,2731.806102,2768.0,438.0,131071.0
7,WL_EDD0411E,2018-12-31 22:10:00,2021-10-20 08:45:00,2595.368893,2744.0,248.0,131071.0
8,WL_EDD04FCC,2018-12-31 22:10:00,2022-03-23 18:05:00,2237.322308,2261.0,1693.0,3625.0
9,WL_EDD05268,2018-12-31 22:10:00,2022-03-23 18:05:00,2464.304978,2624.0,1682.0,3657.0


## Import Data Files and resample to Daily 

In [3]:
daily_dic = {}

for file in os.listdir(os.path.join("..", "Data/Processed")):
    data = pd.read_csv(os.path.join("..", "Data/Processed", file), parse_dates=['date'])
    data = data.replace(-66577, np.nan)
    
    # Remove outliers based on a certain number of standard deviations from the mean 
    # Probably could use a more advanced filtering method later if need
    STDs = 3
    data = data[((data['data'] - data['data'].mean()) / data['data'].std()).abs() < STDs]
    
    # Convert from decimal feet to ft. 
    data['data'] = data['data']/100
    
    #resample data to daily 
    data = data.set_index("date")
    data_daily =  data.resample('D').mean() 
    
    # record daily files in a dictionary of dataframes
    daily_dic[file] = data_daily
    
daily_dic.keys()

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
## Import metadata
Station_Meta = pd.read_csv("Gauged_reservoirs_META.csv", index_col=0)
Station_Meta.head()

Unnamed: 0,batt_1w_slope,batt_alert,batt_alert_off,batt_alert_on,dlnrid,level_alert,level_alert_off,level_alert_on,name,id
0,5.056549e-08,0,11.7,11.5,KA-0099,0,17.0,19.0,Waita,EDD00214
1,1.113613e-06,0,11.7,11.5,KA-0010,1,11.0,12.0,Waiakalua,EDD00CC6
2,0.0,0,11.7,11.5,MA-0073,0,13.0,14.0,HC&S #24,EDD01162
3,0.0,0,11.7,11.5,MA-0093,0,34.4,35.4,Kaupakalua,EDD01FB0
4,6.511962e-08,0,11.7,11.5,OA-0154,0,393.0,394.0,Nuuanu No 1,EDD024F8


dict_keys(['WL_EDD00214.csv', 'WL_EDD00CC6.csv', 'WL_EDD01FB0.csv', 'WL_EDD024F8.csv', 'WL_EDD02A2A.csv', 'WL_EDD0378E.csv', 'WL_EDD0395C.csv', 'WL_EDD0411E.csv', 'WL_EDD04FCC.csv', 'WL_EDD05268.csv', 'WL_EDD05CBA.csv', 'WL_EDD067F2.csv', 'WL_EDD06920.csv', 'WL_EDD07484.csv', 'WL_EDD07A56.csv', 'WL_EDD08400.csv', 'WL_EDD08AD2.csv', 'WL_EDD099A4.csv', 'WL_EDD0A2EC.csv', 'WL_EDD0AC3E.csv', 'WL_EDD0B19A.csv', 'WL_EDD0BF48.csv', 'WL_EDD0C70A.csv', 'WL_EDD0C9D8.csv', 'WL_EDD0D47C.csv'])

# RAINDATA
using statewide data download from HCDP, its actually a total pain. The data formattng is strange, and station metadata is inconsistent. Oh well...

In [157]:
#Concat rain data 

Datadic = {}
metadata_dic = {}


raindatapath = "Data/External_data/Precip/2018-2022_station_data"

for year in os.listdir(os.path.join("..", raindatapath)):
    for month in os.listdir(os.path.join("..", raindatapath, year)):
        file = os.listdir(os.path.join("..", raindatapath, year, month))
        
        # Process into a better format where columns are individual stations and y axis is date
        data = pd.read_csv(os.path.join("..", raindatapath, year, month, file[0]))
        
        # Extract a separaate dataframe of metadata 
        data_metaonly = data[['SKN', 'Station.Name', 'Observer', 'Network', 'Island', 'ELEV.m.',
       'LAT', 'LON', 'NCEI.id', 'NWS.id', 'NESDIS.id', 'SCAN.id', 'SMART_NODE_RF.id']]
        metadata_dic[file[0]]  = data_metaonly
        
        
        data.drop([ 'Station.Name', 'Observer', 'Network', 'Island', 'ELEV.m.',
               'LAT', 'LON', 'NCEI.id', 'NWS.id', 'NESDIS.id', 'SCAN.id',
               'SMART_NODE_RF.id'], axis=1, inplace=True)

        data = data.set_index("SKN", drop=True)
        data = data.transpose()

        data = data.reset_index()
        data['index'] = data['index'].astype(str)

        data['index'] = data['index'].apply(lambda x: x[1:])  # Pull out the X
        data['index'] = pd.to_datetime(data['index'], yearfirst=True)  # .dt.strftime("%Y-%m-%d")
        data.set_index("index", inplace=True)
        data.columns.name = None   # drop the wierd double axis label 
        
        Datadic[file[0]]  = data 
        
# Produce the rainfall dataframe 
RainData = pd.concat(Datadic.values(), sort=True)  
# Produce the metadataframe 
RainData_Meta = pd.concat(metadata_dic.values(), sort=True) 

# SO herees a problem, NO field in the metadata is unique, meaning there is some f-up in the station IDentifiers.  Creating a *hoprefully* unique ID with a couple fields
RainData_Meta['UID'] = RainData_Meta['LAT'].map('{:.6f}'.format).astype(str)+"_"+RainData_Meta['LON'].map('{:.6f}'.format).astype(str)
RainData_Meta.drop_duplicates(subset='UID', inplace=True)     # Hoping to limit to to non-duplicate stations    

# genearate near table outside in Arc 
RainData_Meta["SKN"] = RainData_Meta["SKN"].astype(str)+"_R"   # First make Arc read it as a string 
RainData_Meta.to_csv('RainData_Meta_v2.csv')
# NOW Open in Arc and do a "Point Distance" to generate a near table for each of the reservoirs and each rain location point
# Then need to filter pertinant fields and so a table join based on the table FIDs
RainData_Meta.head()

Unnamed: 0,ELEV.m.,Island,LAT,LON,NCEI.id,NESDIS.id,NWS.id,Network,Observer,SCAN.id,SKN,SMART_NODE_RF.id,Station.Name,UID
0,957.0,BI,19.1089,-155.7467,USC00512566,,,COOP,COOP,,2.1_R,,KAHUKU MAUKA,19.108900_-155.746700
1,647.0,BI,19.10778,-155.78944,USC00518652,,,COOP,COOP,,2.32_R,,SOUTH KONA 2 2.32,19.107780_-155.789440
2,280.0,BI,19.186302,-155.886763,USC00516304,,,COOP,COOP,,2.34_R,,MILOLII 2.34,19.186302_-155.886763
3,533.0,BI,19.14472,-155.84944,USC00513376,,,COOP,COOP,,2.36_R,,KAPUA 2.36,19.144720_-155.849440
4,1273.0,BI,19.135,-155.747,US1HIHI0035,,,CoCoRaHS,HAWAIIAN OCEAN VIEW 4.7 NNE,,3.22_R,,HAWAIIAN OCEAN VIEW 4.7 NNE,19.135000_-155.747000


In [114]:
# Find the nearest rainfall stations to each of the gauges

n = 5   # The number of rain stations to use
near_table = pd.read_csv(os.path.join("..", "Data\\External_data\\Precip\\2018-2022_station_data_GIS", 'Reservoirs_Precip_Near_table_V2.csv'))

neartable_N = pd.DataFrame(columns=near_table.columns)

for i in near_table['id'].unique():
    temptable = near_table[near_table['id'] == i]  # For each of the reservoits
    
    temptable = temptable.sort_values('DISTANCE')  # find the closest stations
    temptable = temptable.iloc[0:n]             # select only the n closest stations 
    neartable_N = neartable_N.append(temptable)               # Create dataframe of only the n closest stations 
    
neartable_N.head()

Unnamed: 0,OBJECTID,INPUT_FID,NEAR_FID,DISTANCE,FID,name,id,FID_1,Island,SKN,Station_Na,UID,UID_X,UID_Y
458,459,0,391,1327.753383,0,Waita,EDD00214,391,KA,936.0_R,KOLOA 936,21.906865_-159.462206,-159.462206,21.906865
455,456,0,400,2821.076687,0,Waita,EDD00214,400,KA,992.1_R,Omao,21.907101_-159.476800,-159.4768,21.907101
456,457,0,393,2962.88865,0,Waita,EDD00214,393,KA,941.1_R,MAHAULEPU 941.1,21.901030_-159.422207,-159.422207,21.90103
457,458,0,392,3357.76538,0,Waita,EDD00214,392,KA,940.2_R,KOLOA 2.1 SSE,21.878800_-159.451800,-159.4518,21.8788
454,455,0,481,3600.799018,0,Waita,EDD00214,481,KA,940.3_R,POIPU,21.882800_-159.429000,-159.429,21.8828


# Create plots of reservoir level and rainfall 

In [164]:

smalldic = {key: daily_dic[key] for key in ['WL_EDD00214.csv', 'WL_EDD00CC6.csv', 'WL_EDD01FB0.csv', 'WL_EDD024F8.csv']}   # For testing 

for i in daily_dic.keys():  #daily_dic.keys(): 

    stat_id = i[3:-4]
    isla = Station_Meta['dlnrid'][Station_Meta['id'] == stat_id].values[0].split('-')[0]
    
    fig, ax = plt.subplots(figsize=(8,3))
    daily_dic[i].plot(y='data', ax=ax, marker=".", color='k', alpha=0.5, label="WL_(ft)")
    
    # Add the lever alert lines
    Level_alert_on = Station_Meta['level_alert_on'][Station_Meta['id'] == stat_id].values[0]
    plt.axhline(y=Level_alert_on, color='r', linestyle='-', alpha = 0.5)   
    Level_alert_off = Station_Meta['level_alert_off'][Station_Meta['id'] == stat_id].values[0]
    plt.axhline(y=Level_alert_off, color='g', linestyle='-', alpha = 0.5)
    
    
    # Add rainfall 
    
    # Create average rainfall from nearest N stations 
    ax2=ax.twinx()
    nearest_SKN_table = neartable_N[neartable_N['id'] == stat_id]     # Reference the nearest SKN stations for rainfall 
    # list nearest 5 skns
    nearest5skns = list(nearest_SKN_table['SKN'])                      # Pull out the SKN vales from the dataframe 
    nearest5skns = list(map(lambda x: x.split("_")[0], nearest5skns))  # Quick list function map to pull of the "_R" from each SKN
    nearest5skns = list(map(lambda x: float(x), nearest5skns))  # Quick list function map to turn values back into floats
    rain_datatoplot_5 = RainData[nearest5skns]     # make it a single dataframe 
    rain_datatoplot_5_mean = rain_datatoplot_5.mean(axis=1)    # calculate the average rainfall over all N stations 
    
    
    # Find the nearest SKN 
    nearest_SKN_table_min = nearest_SKN_table[nearest_SKN_table['DISTANCE'] == nearest_SKN_table['DISTANCE'].min()]
    nearest_SKN = nearest_SKN_table_min['SKN'].values[0]
    nearest_SKN_numeric = float(nearest_SKN.split("_")[0])
    rain_datatoplot = RainData[nearest_SKN_numeric]

    
    # FOR JUST THE NEAREST STATION 
    #rain_datatoplot.plot(ax=ax2, color='b', alpha=0.2)
    
    
    # FOR THE NEAREST N STATIONS
    rain_datatoplot_5_mean.plot(ax=ax2, color='b', alpha=0.2)
    
       
    plt.title(stat_id +" "+ "{}".format(Station_Meta['name'][Station_Meta['id'] == stat_id].values[0])+"--"+isla)
    ax2.set_ylabel('Rainfall (mm)', color='b')
    ax.set_ylabel('WL-reservoir (ft)', color='k')
    plt.tight_layout()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Notes
Some manual analysis of these graphs: 
- it looks like each reservoir has different responses, some dont seem to respond to the rain some do.
- Some seem to repond a little sooner, within a day, some show a clear day lag. 