# Retreive Utah Reservoir and Observation Site data for WaDE
Notes:
- Distribution_Stations.csv contains site information, provdied by David J. Jones via email.
- Ex API: https://www.waterrights.utah.gov/dvrtdb/DailyCommaData.aspBYEAR=1960&EYEAR=2022&StationId=2614&Units=Mean+daily+discharge+in+CFS

In [1]:
# Needed Libararies

# Working with data
import os
import numpy as np
import pandas as pd
import geopandas as gpd

# Visulizaiton
import matplotlib.pyplot as plt
import seaborn as sns

# Working with API
import requests
import io

# Cleanup
import time
from datetime import datetime
pd.set_option('display.max_columns', 999)  # How to display all columns of a Pandas DataFrame in Jupyter Notebook

In [2]:
# Working Directory
workingDir = "G:/Shared drives/WaDE Data/Utah/SS_ReservoirsObservationSites/RawInputData"
os.chdir(workingDir)

### Distribution_Stations Site Data

In [None]:
# this is working fine & can be skipped for now.

fileInput = "Distribution_Stations.csv"
dfds = pd.read_csv(fileInput)
print(len(dfds))
dfds.head()

In [None]:
# Get list of STATION_ID
stationIDList = dfds['STATION_ID'].astype(int).astype(str).tolist()   
print(len(stationIDList))
stationIDList

### Getting Workable URLs using Station ID list from API 1

In [None]:
%%time
# get csv return of app service results
# strip down 
# use STATION_ID in url

# create empty url dataframe
dfurl = pd.DataFrame()

slength = len(stationIDList)
for i in range(slength):
    url = "https://www.waterrights.utah.gov/dvrtdb/DailyCommaData.asp?BYEAR=2000&EYEAR=2022&StationId=" + str(stationIDList[i]) + "&Units=Mean+daily+discharge+in+CFS.txt"
    print(url)
    dfapi_1 = pd.read_csv(url, on_bad_lines='skip') # read in file
    dfapi_1['oldURL'] = url
    dfapi_1 = dfapi_1.rename(columns={'<!doctype html public "-//w3c//dtd html 4.0 transitional//en">': "ReturnVal"}) # rename column to something simplier
    dfapi_1 = dfapi_1.iloc[[12]] # remove unnecessary row
    
    # remove unnecessary string from value
    dfapi_1['ReturnVal'] = dfapi_1['ReturnVal'].str.replace('<form name="submitform" action="/', '')
    dfapi_1['ReturnVal'] = dfapi_1['ReturnVal'].str.replace('">', '')
    dfapi_1['ReturnVal'] = dfapi_1['ReturnVal'].str.replace('">', '')
    dfapi_1['ReturnVal'] = dfapi_1['ReturnVal'].str.replace(' ', '')
    
    # fill in dataframe
    dfapi_1['newURL'] = "https://www.waterrights.utah.gov/" + dfapi_1['ReturnVal'].astype(str)
    dfapi_1['STATION_ID'] = str(stationIDList[i])
    
    # concatenate
    dfurl = pd.concat([dfurl, dfapi_1]) 

dfurl.to_excel('dataframeURLs.xlsx', index=False)  # The output
print(len(dfurl))
dfurl.head()

### Getting timeseries data using new generated urls.

In [None]:
%%time
# get timeseries results
# use STATION_ID in url

STATION_IDList = dfurl['STATION_ID'].astype(str).tolist()
urlList = dfurl['newURL'].astype(str).tolist()

# create empty timeseries dataframe
dfts = pd.DataFrame()

urllength = len(urlList)
for i in range(urllength):
    url = str(urlList[i])
    print(url)
    dfapi_2 = pd.read_csv(url, on_bad_lines='skip')
    dfapi_2['STATION_ID'] = str(STATION_IDList[i])
    dfts = pd.concat([dfts, dfapi_2]) 

dfts.to_excel('dataframeTimeSeries.xlsx', index=False)  # The output
print(len(dfts))
dfts.head()