<a href="https://colab.research.google.com/github/geoskimoto/AWDB-Air-Water-Database-Web-Service/blob/main/AWDB_webservice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Library Imports

In [None]:
try:
    import xmltodict
    print("module 'xmltodict' is installed")
except ModuleNotFoundError:
  !pip install xmltodict


module 'xmltodict' is installed


In [None]:
import xmltodict
import requests
import numpy as np
import pandas as pd
import datetime
from functools import reduce

## Web Service Functions

In [None]:
# Web Call to Access and Download Data of a Single Station from AWDB Web Service (SOAP API)

def SOAP_Call(stationtriplets, elementCD, begindate, enddate):
  global xml, dict_of_xml, df
  # Create a dictionaries to store the data
  headers = {'Content-type': 'text/soap'}
  # current_dictionary = {}
  
  # Define Web Service URL
  URL = "https://wcc.sc.egov.usda.gov/awdbWebService/services?WSDL"

  # Define Parameters for SOAP Elements (getData:current and getCentralTendencyData:normals)
  SOAP_current = '''
  <?xml version="1.0" encoding="UTF-8"?>
  <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:q0="http://www.wcc.nrcs.usda.gov/ns/awdbWebService" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <SOAP-ENV:Body>
      <q0:getData>
        <stationTriplets>STATIONTRIPLETS</stationTriplets>
        <elementCd>ELEMENTCD</elementCd>   
        <ordinal>1</ordinal>
        <duration>DAILY</duration>
        <getFlags>false</getFlags>
        <beginDate>BEGINDATE</beginDate>
        <endDate>ENDDATE</endDate>
        <alwaysReturnDailyFeb29>false</alwaysReturnDailyFeb29>   
      </q0:getData>
    </SOAP-ENV:Body>
  </SOAP-ENV:Envelope>

  '''.strip()
  #Read GetData documents - If <alwaysReturnDailyFeb29> is set to true, will set a null for every non leap year on the 29th,  
  #which breaks this request when selecting date ranges that include Feb 29.
  #Possible element codes: PREC, WTEQ (Water Equivalent/SWE)
  
  # Post SOAP Elements to AWDB Web Service and process results - getData
  SOAP_current = SOAP_current.replace("ELEMENTCD", elementCD)
  SOAP_current = SOAP_current.replace("STATIONTRIPLETS", stationtriplets)
  SOAP_current = SOAP_current.replace("BEGINDATE", begindate)
  SOAP_current = SOAP_current.replace("ENDDATE", enddate)

  #Send request to server and receive xml document
  xml = requests.post(URL, data=SOAP_current, headers=headers)

  #convert xml document to a dictionary, extract values putting them in a dataframe.  XML's aren't the easiest to parse and extract data from, so this is a nice work around.
  dict_of_xml = xmltodict.parse(xml.text)
  df = dict_of_xml['soap:Envelope']['soap:Body']['ns2:getDataResponse']['return']['values']

  #Null values are given as OrderedDictionaries with lots of text, while actual values are given as strings.  This converts all the OrderedDictionaries into actual null/none values, and converts all values that were given as strings into float numbers.
  df = pd.DataFrame(map(lambda i: float(i) if type(i) == str else None, df))

  #Since invidual dates aren't associated with the values in the xml document, have to create a range of dates bw the begindate and endate, which is then added to the dataframe.
  df['Date'] = pd.date_range(begindate,enddate,freq='d')
  df.columns = [f'{elementCD}','Date']
  df.set_index('Date', inplace=True)

  return df


In [None]:
# Function to download data from multiple stations at a time from AWDB Web Service
# Web service request will except multiple stations in a single request, so this is definitely not the best way to do this as it sends multiple requests combining each into a single pandas dataframe.  

def getData(stations, parameter_of_interest, begindate, enddate):

  data_singleDF = reduce(lambda left,right: pd.merge(left,right,left_index=True, right_index=True, how='outer'), [SOAP_Call(stationtriplets=j,elementCD=parameter_of_interest,begindate=begindate,enddate=enddate) for j in stations])
  data_singleDF.columns = [f'{j}' for j in stations]

  return data_singleDF

In [None]:
SOAP_Call('418:WA:SNTL', 'WTEQ', '02/01/2010', '09/02/2017')

Unnamed: 0_level_0,WTEQ
Date,Unnamed: 1_level_1
2010-02-01,16.2
2010-02-02,16.2
2010-02-03,16.2
2010-02-04,16.2
2010-02-05,16.3
...,...
2017-08-29,0.0
2017-08-30,0.0
2017-08-31,0.0
2017-09-01,0.0


In [None]:
stations = ['401:OR:SNTL', '471:ID:SNTL', '591:WA:SNTL']
getData(stations, 'WTEQ', '01/01/2010', '02/02/2020')

Unnamed: 0_level_0,401:OR:SNTL,471:ID:SNTL,591:WA:SNTL
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,3.6,4.4,17.8
2010-01-02,3.3,5.2,18.3
2010-01-03,3.3,5.3,18.4
2010-01-04,3.3,5.3,18.4
2010-01-05,3.6,5.3,18.3
...,...,...,...
2020-01-29,6.1,13.6,23.5
2020-01-30,6.1,13.7,24.1
2020-01-31,6.0,13.8,24.5
2020-02-01,5.6,14.0,24.0
