<a href="https://colab.research.google.com/github/david-levin11/Verification_Notebooks/blob/main/APRFC_Recurrence_Interval_Retrospective_Tool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Alaska Region Precipitation & Recurrence Interval Retrospective Tool**
<br/>
Description--This tool will update the ArcGIS Online retrospective version of the Alaska Region Precipitation & Recurrence Interval tool to look at past cases/events.

**UPDATE 09/27/2024: For the APRFC have added the ability to quickly add sites to the tool which will update both the retrospective AND the real time tool.  For this feature just run steps 1 and 2**

- David Levin, Arctic Testbed & Proving Ground, Anchorage Alaska

##**1 - Install and Import Packages**
This will take about a minute to run.  **Only run this cell one time.  Then you can generate as many snapshots as you need using the below steps.**

In [None]:
# @title
!pip install arcgis
import os
import urllib
import json
import pandas as pd
import numpy as np
import requests
import urllib3
import ast
import logging
import shutil
from datetime import datetime, timedelta
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection

##**2. Update Point ARI Data**
If the ARI file exists on CMS this will take milliseconds.  If it does not exist the file will have to be created on the fly.  If this is the case the average run time is around 35 minutes.  **You only need to run this cell once per session.  Then you can generate as many snapshots as you need by repeating step 3 below**



In [None]:

#@markdown **Enter your Synoptic API token below**
#@markdown <br/>
token = 'c6c8a66a96094960aabf1fed7d07ccf0' #@param {type:"string"}
#@markdown **Update the tool with new sites?  If so, enter a comma seperated list of site ids (or a single site id) below (EX: c6840,pmma2)**
add_new_sites = True #@param {type:"boolean"}
new_sites = 'wera2' #@param {type:"string"}
if add_new_sites:
  new_sites = [x.lower() for x in new_sites.split(',')]

interval_dict = {4:'1hr', 6:'3hr', 7:'6hr', 8:'12hr', 9:'24hr', 10:'48hr', 11:'72hr'}
recurrence_dict = {0:'1yr', 1:'2yr', 2:'5yr', 3:'10yr', 4:'25yr', 5:'50yr', 6:'100yr',
                      7:'200yr', 8:'500yr', 9:'1000yr'}

# disabling url request warnings...seems to pop up now with the latest
# version of colab
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

def get_metadata(site, token):
  metadata_api = f"https://api.synopticdata.com/v2/stations/metadata?"
  meta_api_args = {"token":token,"stid":site}
  req = requests.get(metadata_api, params=meta_api_args)
  # Check if the request was successful (status code 200)
  if req.status_code == 200:
    metadata = req.json()
  else:
    print(f"Error: {req.status_code}")
    print(req.text)
  try:
    lat = metadata['STATION'][0]['LATITUDE']
    lon = metadata['STATION'][0]['LONGITUDE']
    site_df = pd.DataFrame({'Site':site.lower(), 'Lat':round(float(lat),2), 'Lon':round(float(lon),2)}, index=[0])
    return site_df
  except Exception:
    metadata['SUMMARY']['RESPONSE_MESSAGE']
    return metadata['SUMMARY']['RESPONSE_MESSAGE']




# URL of the CSV file
url = 'https://www.weather.gov/source/aprfc/TotalARI_Extract_Version3.csv'

# Send a GET request to fetch the CSV file
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Specify the local path where you want to save the CSV file
    output_file = 'TotalARI_Extract_Version3.csv'

    # Write the content of the CSV file to the local file
    with open(output_file, 'wb') as file:
        file.write(response.content)

    print(f"File saved successfully as {output_file}")
else:
    print(f"Failed to download the file. Status code: {response.status_code}...will need to re-create the file from scratch.  Sorry!")
    print("This will take approximately 30 minutes. You will only need to run this cell one time")


    rifile = 'TotalARI_Extract_Version3.csv'
    #
    # Replace 'your_api_token' with your actual Synoptic API token
    API_TOKEN = 'c6c8a66a96094960aabf1fed7d07ccf0'

    # API endpoint for getting station metadata
    metaurl = 'https://api.synopticdata.com/v2/stations/metadata'

    # Parameters for the API request
    params = {
        'token': API_TOKEN,
        'state': 'AK',  # Alaska
        'country': 'US',
        'status': 'active'  # To get only active stations
    }

    # Make the API request
    response = requests.get(metaurl, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        #print(data)
        # Check if 'STATION' key exists in the response
        if 'STATION' in data:
            stations = data['STATION']

            # Collect station data into a list of dictionaries
            station_data = []
            for station in stations:
                station_id = station.get('STID', 'N/A')
                latitude = station.get('LATITUDE', 'N/A')
                longitude = station.get('LONGITUDE', 'N/A')
                station_data.append({'Site': station_id, 'Lat': latitude, 'Lon': longitude})

            # Convert the list of dictionaries into a pandas DataFrame
            df = pd.DataFrame(station_data)
        else:
            print("No stations found in the response.")
    else:
        print(f"Error: {response.status_code}")
        print(response.text)

    # Define the API URL
    atlas_url = "https://hdsc.nws.noaa.gov/cgi-bin/hdsc/new/cgi_readH5.py"

    # Looping through the sites from the dataframe created above
    data_dict = {'FID': [], 'Site': [], 'Lat': [], 'Lon': []}
    for key in interval_dict:
        for i, ari in enumerate(recurrence_dict):
          header = f'F{recurrence_dict[ari]}{interval_dict[key]}ARI'
          data_dict.update({header:[]})
    for index, row in df.iterrows():
      site = row['Site']
      #print(site)
      #if site == 'PAJN' or site == 'PANC':
      # Set the query parameters
      lat = row['Lat']
      lon = row['Lon']
      params = {
          'lat': lat,
          'lon': lon,
          'type': 'pf',
          'data': 'depth',
          'units': 'english',
          'series': 'pds'
      }
      # Construct the full URL with parameters
      full_url = requests.Request('GET', atlas_url, params=params).prepare().url
      print(f"Full URL for {site}: {full_url}")
      # Make the API request
      response = requests.get(atlas_url, params=params)

      # Check if the request was successful
      if response.status_code == 200:
          # Split the response text to isolate the variable assignments
        lines = response.text.split(';')

        # Dictionary to store parsed data
        parsed_data = {}

        # Parse each variable assignment
        for line in lines:
            if '=' in line:
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()
                try:
                    # Safely evaluate the value using ast.literal_eval()
                    parsed_data[key] = ast.literal_eval(value)
                except (ValueError, SyntaxError):
                    # If the value cannot be parsed, keep it as a string
                    parsed_data[key] = value

        # Access the parsed data, for example:
        result = parsed_data.get('result')
        print(result)
        if result == 'none' or result == 'null':
          continue
        else:
          quantiles = parsed_data.get('quantiles')
          #loop through the quantiles
          data_dict['FID'].append(index)
          data_dict['Site'].append(site)
          data_dict['Lat'].append(lat)
          data_dict['Lon'].append(lon)
          for key in interval_dict:
            for i, ari in enumerate(recurrence_dict):
              header = f'F{recurrence_dict[ari]}{interval_dict[key]}ARI'
              data_dict[header].append(quantiles[key][i])
      else:
          print(f"Error: {response.status_code}")
    #print(data_dict)
    ari_df = pd.DataFrame(data_dict)
    #print(ari_df)
    ari_df.to_csv(rifile, index=False)
    print(f"Done with extracting ARI data.  {rifile} can be found in the current working directory.")

# after data download or creation, can add any new sites
if add_new_sites:
  print(f"Now adding new sites to {output_file if os.path.exists(output_file) else rifile}")
  for site in new_sites:
    # opening our ARI file
    ari_output = output_file if os.path.exists(output_file) else rifile
    old_ari_df = pd.read_csv(ari_output)
    site_list = old_ari_df['Site'].tolist()
    new_fid = old_ari_df['FID'].max() + 1

    if site.upper() in site_list:
      print(f"Site {site} already exists in {ari_output}")
      print("Skipping for now...")
      continue
    else:
      print(f"Site {site} does not exist in {ari_output}")
      print(f"Getting metadata for site {site}")
      try:
        new_site_df = get_metadata(site, token)
        print(new_site_df)
      except Exception:
        print(f"Error getting metadata for site {site}.  See response message below")
        print(new_site_df)
        continue
      # Define the API URL
      atlas_url = "https://hdsc.nws.noaa.gov/cgi-bin/hdsc/new/cgi_readH5.py"
      try:
        # Looping through the sites from the dataframe created above
        data_dict = {'FID': [], 'Site': [], 'Lat': [], 'Lon': []}
        for key in interval_dict:
            for i, ari in enumerate(recurrence_dict):
              header = f'F{recurrence_dict[ari]}{interval_dict[key]}ARI'
              data_dict.update({header:[]})
        for index, row in new_site_df.iterrows():
          site = row['Site']
          #print(site)
          #if site == 'PAJN' or site == 'PANC':
          # Set the query parameters
          lat = row['Lat']
          lon = row['Lon']
          params = {
              'lat': lat,
              'lon': lon,
              'type': 'pf',
              'data': 'depth',
              'units': 'english',
              'series': 'pds'
          }
      except AttributeError:
          print(f"Empty dataframe created for {site}.  Perhaps there is no ARI data for this location")
          continue

      full_url = requests.Request('GET', atlas_url, params=params).prepare().url
      # Construct the full URL with parameters
      print(f"Full URL for {site}: {full_url}")
      # Make the API request
      response = requests.get(full_url, verify=False)

      # Check if the request was successful
      if response.status_code == 200:
          # Split the response text to isolate the variable assignments
        lines = response.text.split(';')

        # Dictionary to store parsed data
        parsed_data = {}

        # Parse each variable assignment
        for line in lines:
            if '=' in line:
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()
                try:
                    # Safely evaluate the value using ast.literal_eval()
                    parsed_data[key] = ast.literal_eval(value)
                except (ValueError, SyntaxError):
                    # If the value cannot be parsed, keep it as a string
                    parsed_data[key] = value

        # Access the parsed data, for example:
        result = parsed_data.get('result')
        print(result)
        if result == 'none' or result == 'null':
          continue
        else:
          quantiles = parsed_data.get('quantiles')
          #loop through the quantiles
          data_dict['FID'].append(new_fid)
          data_dict['Site'].append(site.upper())
          data_dict['Lat'].append(lat)
          data_dict['Lon'].append(lon)
          for key in interval_dict:
            for i, ari in enumerate(recurrence_dict):
              header = f'F{recurrence_dict[ari]}{interval_dict[key]}ARI'
              data_dict[header].append(quantiles[key][i])
      else:
          print(f"Error: {response.status_code}")
      new_ari_df = pd.DataFrame(data_dict)
      updated_ari_df= pd.concat([old_ari_df, new_ari_df], ignore_index=True)
      updated_ari_df.to_csv(ari_output, index=False)
      print(f"Done with extracting ARI data.  {ari_output} can be found in the current working directory.")




##**3. Select End Date (Valid Time)**
Select the valid date/time you want to view the snapshot on the tool.  Date/time format has to be pretty specific so make sure its formatted correctly. Valid times are always in UTC.  You will also need to enter the log-in credentials for the AGOL account (NWS Juneau is where the tool resides).

After this cell finishes running (assuming you have input the correct AGOL credentials), you can check the output at: https://noaa.maps.arcgis.com/apps/dashboards/a4857d02205247a2965b1ad3c5cc369f

In [None]:

"""
Created on Tue Jan 26 16:54:08 2021

@author: David Levin
"""

#@markdown **Enter your valid time in "YYYYmmddhhmm" format below**
END = '' #@param {type:"string"}
START = datetime.strptime(END, '%Y%m%d%H%M') - timedelta(hours=24)
START = START.strftime('%Y%m%d%H%M')
###################### Config for the Precipitation Script ###################

###################### MesoWest API Config ###################################
# The state we want to pull MesoWest data from
STATE = 'ak'

# To search for the last two hours of observations; recent=120.
RECENT = '180'

# pmode (totals, intervals, last), defines the interval mode to calculate precipitation.
# If omitted the returned JSON formatting will be significantly different.
PMODE = 'last'
# pmode=intervals, Returns accumulated precipitation for intervals provided in
# the additional interval argument. Valid keywords for interval are hour, day,
# week, month, year, or non-zero integer in hours. Integers must be a factor or
# multiple of 24 (1,2,3,4,6,8,12,24,48,72,etc). Default value is day if
# interval is not provided. Partial intervals at the end of a requested range
# are still returned. Note that all keywords or integers provided to interval
# will use UTC time zone to define the start and end of each interval.
# However, each interval respects the requested start hour such that intervals
# can be offset for a local time zone.
INTERVALS = ['1', '3', '6', '12', '24', '48', '72']

# You will need a MesoWest API account.  It's free and you will receive your own token
# for downloading data.  An example is included below.
TOKEN = token

######################## File Paths ###########################################

# Where you want your .csv file to go...change this!
# Could also add an upload method and have it upload to the web for better
# linkage with Arc Online
OBS_PATH = '/nas/nomad/ARITool'

# Check if the directory exists
print("Now setting up directories")
if not os.path.exists(OBS_PATH):
    # If it doesn't exist, create it
    os.makedirs(OBS_PATH)
    print(f"Directory '{OBS_PATH}' created successfully.")
else:
    print(f"Directory '{OBS_PATH}' already exists.")



# Whatever you want to call your precip file (these should be stored in the OBS_PATH directory)
PRECIP_OBS_FILE = 'LatestAKPrecipTEST.csv'
# The file name of the merged precip data and bad station data
FINAL_PRECIP_FILE = 'LatestAKPrecip_BadStations.csv'
# The list of your static ARI .csv files which should be in the same
# directory as your other data (OBS_PATH)
ARI_FILE = 'TotalARI_Extract_Version3.csv'
# This is the master file which will eventually overwrite the hosted feature layer
# on the ArcGis Online server.
FINAL_OUTPUT_FILE = 'LatestPrecip_ARITotal_Retrospective_TEST.csv'
# Where you would like your log file to be placed
LOG_PATH = '/nas/nomad/ARITool'
# Name of your log file
LOG_FILE = 'meso_west.log'

try:
  print("Now moving some files around to get started...")
  # Source file path (assuming it's in the current working directory)
  source_file = ARI_FILE

  # Destination directory path
  destination_dir = '/nas/nomad/ARITool'

  # Destination file path
  destination_file = os.path.join(destination_dir, source_file)

  # Move the file
  shutil.move(source_file, destination_file)

  print(f"File moved successfully to {destination_file}")
except FileNotFoundError:
  print(f"Source file '{source_file}' not found in working directory.")
  print(f"Will check {OBS_PATH}")
  if os.path.exists(os.path.join(OBS_PATH, ARI_FILE)):
    print(f"{ARI_FILE}' already exists in {OBS_PATH}")
  else:
    raise FileNotFoundError(f"{ARI_FILE} not found in {OBS_PATH}.  Run cell 2 one more time!")


###################### Output Headers & Formating #############################

# initializing our output for the .csv file we will generate
# Just make sure the headers for the variables are in the same order as the
# INTERVALS list above
PRECIP_OUTPUT = 'FID,Site,Lat,Lon,DateTime,1hr_Precip,3hr_Precip,6hr_Precip,'
PRECIP_OUTPUT += '12hr_Precip,24hr_Precip,48hr_Precip,72hr_Precip\n'

# A list of redundant columns after merging the ARI and MesoWest dataframes
# Probably won't have to change this
BAD_COLUMNS = ['FID_y', 'Lat_y', 'Lon_y']
# for dropping NaN values by column name
ARI_HOURS = ['72', '48', '24', '12', '6', '3', '1']

# A list of the return intevals you would like to be calculated
ARI_LIST = ['1000', '500', '200', '100', '50', '25', '10', '5', '2', '1']

#################### ArcGis Login & URLs #####################################
#@markdown Login credentials for AGOL.  Make sure to use your office
#@markdown Enterprise account login info so that you have full permissions
AGOL_USER = '' #@param {type:"string"}

AGOL_PASSWORD = '' #@param {type:"string"}
# Url for the hosted feature layer which will be overwritten.  You can grab
# this by copying the url after opening the hosted feature layer in the NOAA
# Enterprise Account for your office under "Content"
# Note that this hosted layer has to actually exist for this tool to work.
# You will need to manually upload the .csv file the first time. After that, this
# should be automated as you will have a url generated after the first upload.
AGOL_URL = 'https://services2.arcgis.com/C8EMgrsFcRFL6LrL/arcgis/rest/services/'
AGOL_URL += 'LatestPrecip_ARITotal_Retrospective_TEST/FeatureServer'
AGOL_NAME = 'LatestPrecip_ARITotal_Retrospective_TEST'
#https://services2.arcgis.com/C8EMgrsFcRFL6LrL/arcgis/rest/services/LatestPrecip_ARITotal_Retrospective_TEST/FeatureServer

###################### Config for Flagging Bad Stations ######################
# URL for the google sheet RFC publishes as a web service
STN_URL = 'https://script.googleusercontent.com/macros/echo?user_content_key='
STN_URL += '5J_H9sc27K5WgpKbXmbm_zG9LirVMIUFRIggn7LFHkkCbDZU2csoUZH5mTS2uXmfTiv'
STN_URL += 'ywganm1M-nO4EO14d-FxMMk8D-Ep5m5_BxDlH2jW0nuo2oDemN9CCS2h10ox_1xSnc'
STN_URL += 'GQajx_ryfhECjZEnG-eZm-jLUSFG87Bnj5J5AjenQfNssDMPC4AmEKfQvHjFVxZhIML'
STN_URL += 'mTS8mzDTCqnGIg&lib=MXTfKjftoipE0WwmBFaK8ZA0VGsytdp8v'

# Columns from the APRFC spreadsheet you want to keep
# I chose the "Status", "Status as of", and "Notes" columns
COL_INDEX = ['Status', 'Status as of', 'Notes', 'Bad_Obs', 'Good_Obs']
# File to which you are writing your text data from the url above
BAD_STN_FILE = 'Bad_Stations.txt'
# where your parsed data gets placed from the data in the text file above
BAD_STN_SHEET = 'APRFC_Bad_Stations.csv'

########################## Methods ##############################################

def download_precip(start, end, state, recent, mode, ints, token):
    """

    Parameters
    ----------
    state : The state abbreviation (lower case) for
        which you are requesting data.
    recent : Number of minutes to look back for new obs from the most recent
        time. 180 would be looking back 3 hrs for instance.
    mode : There are two modes--intervals and last.  Intervals returns precip
        for a requested time interval.  Last returns precip based on the latest
        time and you can use 'accum_intervals' to specify a list of intervals
        based on this time.  Last is preferred and this is how the method is
        set up.
    ints : A list of the time intervals for which to pull precip data in hours
        (i.e. 1,3,6,12,24)
    token : Your MesoWest API
        token.

    Returns
    -------
    data : A self describing JSON
        object from the MesoWest API.

    """
    n = 0
    # Initializing our url
    #url = 'https://api.synopticdata.com/v2/stations/precip?'
    #url += 'state='+state+'&pmode='+mode+'&recent='+recent+'&accum_hours='
    url = 'https://api.synopticdata.com/v2/stations/precip?'
    url+='state='+state+'&pmode='+mode+'&start='+start+'&end='+end+'&accum_hours='
    # To be tacked on after we add our vars
    endurl = '&units=english&output=json&token='+token
    # adding our vars
    while n < len(ints):
        if n != len(ints)-1:
            url += ints[n]+','
        else:
            url += ints[n]
        n = n+1
    # now adding the end of the url
    url = url+endurl
    # now requesting the data
    page = urllib.request.urlopen(url)
    data = page.read()

    return data

def parse_json(data):
    """
    Parameters
    ----------
    data : A self describing
        JSON object from MesoWest API
    Returns
    -------
    json_dict : A python dictionary
        created from the JSON object.
    """
    # Converting from json to python dictionary
    json_dict = json.loads(data)
    return json_dict

def parse_precip(precip_output, json_dict):
    """
    Parameters
    ----------
    precip_output : An output string of headers for your precip data which is
    pulled from the config file
    json_dict : A python dictionary created from a MesoWest JSON query
    Returns
    -------
    precip_output : The original output string with all the data organized and
                    added into a comma separated format
    """
    for i in range(0, len(json_dict['STATION'])):
        # now pulling the data out of the massive dictionary by looping through the stations
        ob = json_dict['STATION'][i]
        # ArcGIS nees an FID field to plot so making one up with i
        FID = str(i)
        site = ob['STID']
        #print(site)
        lat = ob['LATITUDE']
        lon = ob['LONGITUDE']
        datetime = ob['OBSERVATIONS']['precipitation'][0]['last_report']
        pdata = ob['OBSERVATIONS']['precipitation']
        # datetime is the last entry in the "PERIOD_OF_RECORD" dictionary
        #datetime = ob['PERIOD_OF_RECORD']['end']
        precip_output += FID+','+site+','+lat+','+lon+','+datetime+','
        # Checking to make sure our precip intervals are there, if no precip
        # is reported those intervals are blank in the json data
        # if all the intervals are there the length of the list will be 7
        # else we need to check for missing intervals
        if len(pdata) == 7:
            for count, value in enumerate(pdata):
                if count != 6:
                    # Getting rid of traces that show up as 0.001
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+','
                    else:
                        precip_output += '0.00,'
                else:
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+'\n'
                    else:
                        precip_output += '0.00\n'
        elif len(pdata) == 6:
            precip_output += ','
            for count, value in enumerate(pdata):
                if count != 5:
                    # Getting rid of traces that show up as 0.001
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+','
                    else:
                        precip_output += '0.00,'
                else:
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+'\n'
                    else:
                        precip_output += '0.00\n'
        elif len(pdata) == 5:
            precip_output += ',,'
            for count, value in enumerate(pdata):
                if count != 4:
                    # Getting rid of traces that show up as 0.001
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+','
                    else:
                        precip_output += '0.00,'
                else:
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+'\n'
                    else:
                        precip_output += '0.00\n'
        elif len(pdata) == 4:
            precip_output += ',,,'
            for count, value in enumerate(pdata):
                if count != 3:
                    # Getting rid of traces that show up as 0.001
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+','
                    else:
                        precip_output += '0.00,'
                else:
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+'\n'
                    else:
                        precip_output += '0.00\n'
        elif len(pdata) == 3:
            precip_output += ',,,,'
            for count, value in enumerate(pdata):
                if count != 2:
                    # Getting rid of traces that show up as 0.001
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+','
                    else:
                        precip_output += '0.00,'
                else:
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+'\n'
                    else:
                        precip_output += '0.00\n'
        elif len(pdata) == 2:
            precip_output += ',,,,,'
            for count, value in enumerate(pdata):
                if count != 1:
                    # Getting rid of traces that show up as 0.001
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+','
                    else:
                        precip_output += '0.00,'
                else:
                    if value['total'] >= 0.01:
                        precip_output += str(round(value['total'], 2))+'\n'
                    else:
                        precip_output += '0.00\n'
        elif len(pdata) == 1:
            precip_output += ',,,,,,'
            for count, value in enumerate(pdata):
                if value['total'] >= 0.01:
                    precip_output += str(round(value['total'], 2))+'\n'
                else:
                    precip_output += '0.00\n'
    return precip_output

def grab_bad_stnlist(url):
    """

    Parameters
    ----------
    url : a url from which you wish to scrape data
    Returns
    -------
    data : the data from your urllib.request object

    """
    # now requesting the bad station list from APRFC
    page = urllib.request.urlopen(url)
    data = page.read()

    return data

def write_to_file(data, path, fname, binary=True):
    """

    Parameters
    ----------
    data : raw data (such as from a web scrape)
    path : where you want your data stored
    fname : name of your output file
    binary : write mode of 'wb' if True and 'w' if False
    Returns
    -------
    None

    """
    if binary:
        writeflag = 'wb'
    else:
        writeflag = 'w'
    outfile = open(os.path.join(path, fname), writeflag)
    outfile.write(data)
    outfile.close()

def format_data(path, fname):
    """
    Parameters
    ----------
    path : path to your data
    fname : name of your text file
    Returns
    -------
    dict_fm_file : formatted data (python dictionary/list from your text data)

    """
    with open(os.path.join(path, fname)) as newfile:
        #dict_fm_file = eval(newfile.read())
        dict_fm_file = json.loads(newfile.read())
        newfile.close()

    return dict_fm_file

def build_badstn_dict(station_list):
    """

    Parameters
    ----------
    station_list : a python list of dictionaries formatted from the APRFC web
        service google sheet of bad station data
    Returns
    -------
    stn_dict : reformats the data into a dictionary with the keys being
        column headers in the original spreadsheet and the values being a list
        containing the column values

    """
    stn_dict = {}
    stn_keys = station_list[0].keys()
    for key in stn_keys:
        values_list = []
        for stn in station_list:
            values_list.append(stn[key])
        stn_dict.update({key:values_list})
    return stn_dict

def create_badstn_dataframe(df_dict, drop_col, drop_dupes=True):
    """

    Parameters
    ----------
    df_dict : a python dictionary from which you want a dataframe
    drop_col : A column (string) you wish to search for duplicate values
    drop_dupes : if set to True will drop all rows that have duplicate values
    in the drop_col variable
    Returns
    -------
    newdf : a pandas dataframe

    """
    newdf = pd.DataFrame(df_dict)
    if drop_dupes:
        newdf.drop_duplicates(drop_col, inplace=True)
        newdf.reset_index(inplace=True)
    else:
        pass
    return newdf

def save_badstn_dataframe(df, path, dfname, cols_to_keep, col_to_drop,
                          dropcols=True, dropindex=True, keep_cols=True):
    """

    Parameters
    ----------
    df : a pandas dataframe you wish to save as a .csv
    path : where you want your file saved
    dfname : what you want to call your file
    cols_to_keep : a list of columns you wish to save (can be an empty list,
                                                       just set keep_cols to False)
    col_to_drop : A single column (string) you wish to drop from your dataframe.  Can be
    empty string.
    dropcols : You can choose to drop a single column if true.  Otherwise
    better just to use cols_to_keep with keep_cols = True
    dropindex : Deleting the index column if desired (True)
    keep_cols : keeps the columns passed in the cols_to_keep list if True
    Returns
    -------
    None

    """
    #print("Dropping this column %s" %(col_to_drop))
    #print("Keeping these columns %s" %(cols_to_keep))
    if dropcols:
        final_df = df.drop([col_to_drop], axis=1)
    else:
        final_df = df
    if dropindex and keep_cols:
        final_df = final_df.reindex(columns=cols_to_keep)
        final_df.to_csv(os.path.join(path, dfname), columns=cols_to_keep, index=False)
    elif dropindex and not keep_cols:
        final_df.to_csv(os.path.join(path, dfname), index=False)
    elif not dropindex and keep_cols:
        final_df = final_df.reindex(columns=cols_to_keep)
        final_df.to_csv(os.path.join(path, dfname), columns=cols_to_keep, index=False)
    elif not dropindex and not keep_cols:
        final_df.to_csv(os.path.join(path, dfname))

def merge_badstn_dataframe(df1, df2, path, stn_columns, merge_how, left_merge, right_merge):
    """

    Parameters
    ----------
    df1 : the primary .csv file (keeping all data)
    df2 : secondary .csv file (you are merging this data with df1)
    stn_columns : a list of columns you wish to keep from the stn_data df (can be empty)
    how : How you want to merge ("left" is preferred but if you choose "right" you will keep
                                 all data from df2 instead)
    left_on : Column in df1 that you wish to merge on
    right_on : Column in df2 that you wish to merge on

    Returns
    -------
    added_df : A merge of df1 and df2 with dummy columns added for bad and good obs
    new_cols : A list of columns you wish to keep in your final df

    """
    first_df = pd.read_csv(os.path.join(path, df1))
    second_df = pd.read_csv(os.path.join(path, df2))
    cols_to_keep = first_df.columns.tolist()
    new_cols = cols_to_keep + stn_columns
    combo_df = first_df.merge(second_df, how=merge_how,
                              left_on=left_merge, right_on=right_merge)
    added_df = add_dummy_cols(combo_df)
    return added_df, new_cols

def add_dummy_cols(df1):
    """

    Parameters
    ----------
    df1 : A Pandas dataframe of bad station data from APRFC.
    Returns
    -------
    df1 : The same dataframe with columns added for the gauge status
    """
    df1['Bad_Obs'] = np.where(df1['Status'] == 'Bad', 'Bad', '')
    df1['Good_Obs'] = np.where(df1['Status'] != 'Bad', 'Good', '')
    return df1

def merge_csv(path, first, second, cols_to_del, hour):
    """

    Parameters
    ----------
    path : The path for your precip data and your ARI (should be in the same location)
    first : An input .csv file containing the ARI data extracted from PFDS
    second : An input .csv file containing the latest MesoWest Precip obs
    col_to_del : A list of redundant columns you wish to delete after merging
            usually lat/lon and FID and can be read from config

    Returns
    -------
    final_df : A pandas dataframe containing the merged MesoWest precip data
            and the ARI data

    """
    df1 = pd.read_csv(os.path.join(path, first))
    df2 = pd.read_csv(os.path.join(path, second))
    combined_df = df1.merge(df2, how='left', left_on='Site', right_on='Site', suffixes=('', '_y'))
    # dropping stations which don't have ARIs
    clean_merged_df = combined_df.dropna(subset=['F1000yr'+hour+'hrARI'])
    final_df = clean_merged_df.drop(columns=cols_to_del)
    return final_df

def calc_all_percent_exceedance(path, infile, hours, ri_list):
    """

    Parameters
    ----------
    path : The path for your precip data and your ARI (should be in the same location)
    infile : An input .csv file containing the merged MesoWest precip obs and
            ARI data from PFDS
    hours : A list of the time intervals for the accumulated precip (strings such as: '24','12')
    ri_list : A list of the ARI for which you want percent
           exceedances (1, 2, 5, 10 year ARI etc)

    Returns
    -------
    clean_exceedance_df : A pandas dataframe containing the merged MesoWest precip data
            and the ARI data with percent exceedances calculated and the RIs
            dropped to clean up the file a bit

    """
    exceedance_df = pd.read_csv(os.path.join(path, infile))
    # calculating % exceedance at each precip interval, for each RI at that interval
    # initializing a list of RI columns we can now drop since we no longer need them
    drop_list = []
    for hr in hours:
        for col in ri_list:
            exceedance_df[col+'yr_'+hr+'hr_PercentExceedance'] = \
            round((exceedance_df[hr+'hr_Precip']/exceedance_df['F'+col+'yr'+hr+'hrARI'])*100, 0)
            drop_list.append('F'+col+'yr'+hr+'hrARI')
    # Now dropping the RI columns as we don't need them any more
    clean_exceedance_df = exceedance_df.drop(drop_list, axis=1)
    return clean_exceedance_df

# Need to pass an hours list
def calc_all_ari(path, infile, hours, ri_list):
    """

    Parameters
    ----------
    path : The path for your precip data and your ARI (should be in the same location)
    infile : An input .csv file containing the merged MesoWest precip obs and
            ARI data from PFDS along with the calculated % exceedance (important!)
    hours : A list of the time intervals for the accumulated precip (string such as: '24','12')
    ri_list : A list of the ARI for which you want percent
           exceedances (1, 2, 5, 10 year ARI etc)

    Returns
    -------
    ri_df : A pandas dataframe containing the merged MesoWest precip data
            and the ARI data with the ARI calculated for each ob

    """
    # opening our file
    ri_df = pd.read_csv(os.path.join(path, infile))
    # Looping through the precip accumulation intervals
    for hr in hours:
        # Initiating our new columns
        col_names = []
        # Now naming our columns from the list of ARIs
        for col in ri_list:
            col_names.append(col+'yr_'+hr+'hr_PercentExceedance')
        #Initializing our list of conditions to check our data against
        conditions = []
        # Initializing the list of values we want to place in our RI column
        # when certain conditions are met
        output = []
        # We want to check each percent exceedance column for values > 100%
        for value in col_names:
            conditions.append(ri_df[value] >= 100)
        # We want to fill our RI column with the return interval that corresponds
        # to the met condition
        for ari in ri_list:
            output.append(float(ari))
        #print('Output is: ')
        #print(output)
        # Now for amounts that do not exceed any RI, we have a seperate condition
        conditions.append(ri_df[col_names[len(col_names)-1]] < 100)
        # For this, we want to append the actual percent exceedance/100
        output.append(round(ri_df[col_names[9]].div(100), 2))
        # Now we add a new column to our dataframe with the calculated output
        # values generated from our conditions
        ri_df[hr+'hr_RI'] = np.select(conditions, output)
    # calculating which precip accumulation interval has the highest RI
    ri_df['Max_RI'] = ri_df[['72hr_RI','48hr_RI','24hr_RI','12hr_RI','6hr_RI','3hr_RI','1hr_RI']].max(axis=1)
    return ri_df

def replace_gis_file(user, pw, path, myfile, agol_name):
    """
    Parameters
    ----------
    user : Your AGOL username (string)
    pw : Your AGOL password (string)
    path : The file path where your .csv file is stored (string)
    myfile : The filename of your .csv file (string)
    agol_name : The name of your hosted feature layer (should be the filename
                                                       without the extention
                                                       i.e. 'AK_Obs' instead
                                                       of 'AK_Obs.csv')
    Returns
    -------
    None
    """
    # GIS logs pretty much everything for you so no need to call logger here
    # Now logging into Arc Online to update the hosted layer
    gis = GIS('https://noaa.maps.arcgis.com/home', username=user, password=pw)

    try:
        data_file   = os.path.join(path, myfile)
        #### Delete prior to reposting ####
        item_types = ["CSV", "Feature Layer Collection"]
        name_list = [agol_name]
        for current_item_type in item_types:
            for file_name in name_list:
                search_result = gis.content.search(query=file_name, item_type=current_item_type)
                if len(search_result) > 0:
                    for item in search_result:
                        item.delete()
                        print("Deleted existing " + current_item_type + ": ", item)

        #### Replace file ####
        csv_item   = gis.content.add({}, data_file)

        #### Re-Publish File ####
        primary_feature_layer   = csv_item.publish()

        print (primary_feature_layer.url)

    except IOError:
        print("GIS error")

def do_some_gis(user, pw, path, myurl, myfile):
    """
    Parameters
    ----------
    user : Your AGOL username (string)
    pw : Your AGOL password (string)
    path : The file path where your .csv file is stored (string)
    myurl : The URL of your hosted feature layer service from AGOL (string)
    myfile : The filename of your .csv file (string)
    Returns
    -------
    None
    """
    # GIS logs pretty much everything for you so no need to call logger here
    # Now logging into Arc Online to update the hosted layer
    login = GIS('https://noaa.maps.arcgis.com/home', username=user, password=pw)
    # Accessing my feature layer collection by using the url generated from
    # within AGOL.
    my_content = myurl
    # Use the url to create a FeatureLayerCollection object which you will overwrite with the
    # new data.
    akobs = FeatureLayerCollection(my_content, login)
    # Now overwriting the old .csv file with the new one.
    # Make sure your obs_path and filenames match
    akobs.manager.overwrite(os.path.join(path, myfile))

def create_hex_colors(ripath, rifile):
    ri_columns = ['1hr_RI','3hr_RI','6hr_RI','12hr_RI','24hr_RI','48hr_RI','72hr_RI','Max_RI']
    new_columns = ['1hrRI_Color','3hrRI_Color','6hrRI_Color','12hrRI_Color','24hrRI_Color',
                   '48hrRI_Color','72hrRI_Color','MaxRI_Color']
    ridf = pd.read_csv(os.path.join(ripath, rifile))
    for count, col in enumerate(new_columns):
        print(ri_columns[count])
        conditions = [ridf[ri_columns[count]] <= 1, ridf[ri_columns[count]] == 2, ridf[ri_columns[count]] == 5,
                      ridf[ri_columns[count]] == 10, ridf[ri_columns[count]] == 25, ridf[ri_columns[count]] == 50,
                      ridf[ri_columns[count]] == 100, ridf[ri_columns[count]] == 200, ridf[ri_columns[count]] == 500,
                      ridf[ri_columns[count]] == 1000, ridf[ri_columns[count]] == 2000]
        values = ['#55FF00','#FFFF00','#FFAA00','#FF0000','#FF99FF','#FF00FF','#660066','#FFFFFF',
                  '#FFFFFF', '#FFFFFF', '#FFFFFF']
        ridf[col] = np.select(conditions, values)
    return ridf
######################## Main Code ##############################################

def execute(START, END):
    # Setting up our logging
    # using the default root logger
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    # Configuring our log
    logging.basicConfig(filename=os.path.join(LOG_PATH, LOG_FILE), filemode='w',
                        format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S',
                        level=logging.DEBUG)
    # Grabbing our logger
    logger = logging.getLogger('')

    # grabbing our data
    precip_data = download_precip(START, END, STATE, RECENT, PMODE, INTERVALS, TOKEN)
    logger.info('Grabbed JSON object from MesoWest')

    # parsing the raw JSON into a python-readable dictionary
    parsed_precip = parse_json(precip_data)
    logger.info('Now checking to see if we have a valid query')
    # Checking to see if we made a valid request from the MesoWest API for precip
    if parsed_precip['SUMMARY']['NUMBER_OF_OBJECTS'] != 0:
        logger.info('Found valid MesoWest API query')
    else:
        # printing the error message from the JSON object to the log file
        logger.warning('Invalid JSON request! %s', parsed_precip['SUMMARY']['RESPONSE_MESSAGE'])
        #sys.exit()
    logger.info('Now looping through the stations')
    # Creating the final precip output string
    final_output = parse_precip(PRECIP_OUTPUT, parsed_precip)
    # Writing the output to the PRECIP_OBS_FILE
    f = open(os.path.join(OBS_PATH, PRECIP_OBS_FILE), 'w')
    f.write(final_output)
    f.close()
    logger.info('Final output now written to %s', PRECIP_OBS_FILE)
    # Now grabbing bad stations from APRFC and appending to the precip file
    logger.info('Now grabbing the bad station list from APRFC')
    write_to_file(grab_bad_stnlist(STN_URL), OBS_PATH, BAD_STN_FILE)
    # Formatting the raw text into a python list
    stn_data = format_data(OBS_PATH, BAD_STN_FILE)
    # reformating the list into a dictionary for ease of transition into pandas
    station_dictionary = build_badstn_dict(stn_data)
    # Creating our dataframe from the bad station data
    station_dataframe = create_badstn_dataframe(station_dictionary, 'Location ID')
    # Saving the file
    logger.info('Now saving the bad station data as %s', BAD_STN_SHEET)
    save_badstn_dataframe(station_dataframe, OBS_PATH, BAD_STN_SHEET,
                          station_dataframe.columns, 'Row', keep_cols=False)
    # Now merging it with the precip file
    logger.info('Now merging precip data with the bad station list from APRFC')
    merged_df, columns = merge_badstn_dataframe(PRECIP_OBS_FILE, BAD_STN_SHEET,
                                                OBS_PATH, COL_INDEX, 'left', 'Site', 'Location ID')
    # Now saving the precip file with the bad station data appended
    save_badstn_dataframe(merged_df, OBS_PATH, FINAL_PRECIP_FILE, columns, '', dropcols=False)
    logger.info('All done updating %s with bad stations!', FINAL_PRECIP_FILE)
    print("All Done Updating Bad Stations!")
    # Merging the precip output with the ARI file
    logger.info('Now merging precip data with the ARIs')
    # # No longer a loop through multiple ARI files
    # Added index = False on the to_csv call to remove the index column
    precip_df = merge_csv(OBS_PATH, FINAL_PRECIP_FILE, ARI_FILE, BAD_COLUMNS, '24')
    precip_df.to_csv(os.path.join(OBS_PATH, FINAL_OUTPUT_FILE), index=False)
    # Calculating the percent exceedances for each precip interval at each station
    ex_df = calc_all_percent_exceedance(OBS_PATH, FINAL_OUTPUT_FILE, ARI_HOURS, ARI_LIST)
    ex_df.to_csv(os.path.join(OBS_PATH, FINAL_OUTPUT_FILE), index=False)
    # Calculating the return intervals for each precip interval at each station
    ari_df = calc_all_ari(OBS_PATH, FINAL_OUTPUT_FILE, ARI_HOURS, ARI_LIST)
    ari_df.to_csv(os.path.join(OBS_PATH, FINAL_OUTPUT_FILE), index=False)
    ari_color_df = create_hex_colors(OBS_PATH, FINAL_OUTPUT_FILE)
    ari_color_df.to_csv(os.path.join(OBS_PATH, FINAL_OUTPUT_FILE), index=False)
    logger.info('All done updating %s with final output!', FINAL_OUTPUT_FILE)
    print("All done updating "+FINAL_OUTPUT_FILE+" with final output")
    # This will upload the new .csv of precip/ari data to AGOL and overwrite the
    # hosted feature layer.  If you for some reason need to add a new column to the
    # .csv you will need to comment this line out and run the 'replace_gis_file'
    # function below
    do_some_gis(AGOL_USER, AGOL_PASSWORD, OBS_PATH, AGOL_URL, FINAL_OUTPUT_FILE)
    logger.info('All done updating %s on the AGOL server', AGOL_URL)
    print('All done updating %s on the AGOL server' %(AGOL_URL))
    # Use the below line ONLY if you need to add a column to the final output .csv file
    # This will delete the hosted feature layer and replace it with a new one
    # You will need to comment out the 'do_some_gis' line above
    #replace_gis_file(AGOL_USER, AGOL_PASSWORD, OBS_PATH, FINAL_OUTPUT_FILE, AGOL_NAME)

if __name__ == '__main__':

    try:
        test_start = datetime.strptime(START, '%Y%m%d%H%M')
        test_end = datetime.strptime(END, '%Y%m%d%H%M')
        execute(START, END)
    except ValueError:
        test_start = 0
        test_end = 0
        print('Wrong datetime format! Must be YYYYmmddHHMM. Ex: 202012021800')

