GIS 5572 Lab 4

Due: 3 weeks from date of assignment

Goals

1. Build a fully functional real-time data visualization and analysis workflow
2. Compare and contrast three types of interpolation: Kriging, IDW, 1 chosen
3. Map stations for avg. monthly temp in live map

Specifics

1. Deliver a notebook/lab writeup that can interpolate last 30 days of NDAWN data live

2. Build an ETL to pull the last 30 days of temperature data from the DNAWN site for all of the NDAWN stations. 
    1. Follow ESRI guide for choosing interpolation methods. Use to justify your methods.

3. What does the literature recommend be used for interpolating temperature data? 
    1. Why? (Find one or two articles to support your claims and reference them in the lab writeup)




## Stations and Values

In [1]:
station_set = {'Grafton' : 77'Plaza' : 67,'Jamestown' : 33,'Garrison' : 83,
'Bottineau' : 12,'Crary' : 18,'Langdon' : 34,'Baker' : 9,
'Bowbells' : 58,'Mott' : 69,'Pillsbury' : 64,'Amidon' : 142,
'Denhoff' : 129,'Finley' : 86,'Dickinson' : 20,'Dunn' : 81,
'Egeland' : 22,'Mohall' : 41,'Dazey' : 19,'Mayville' : 37,
'Karlsruhe' : 59,'Maddock' : 109,'Webster' : 112,'Pekin' : 88,
'Alamo' : 98,'Horace' : 32,'Mandan' : 36,'Marion' : 79,
'Rolla' : 46,'Cando' : 14,'Epping' : 139,'Harvey' : 27,
'Hazen' : 28,'Lisbon' : 76,'Michigan' : 73,'Brampton' : 84,
'Croff' : 136,'Tappen' : 68,'Robinson' : 45,'Edmore' : 97,
'Hawkeye' : 132,'Genoa' : 107,'Niles' : 113,'Mooreton' : 54,
'Cooperstown' : 85,'Inkster' : 80,'Northwood' : 42,'Rugby' : 74,
'Towner' : 49,'Ross' : 66,'Carrington' : 15,'Fortuna' : 126,
'Ray' : 106,'Walhalla' : 51,'Cavalier' : 16,'Crystal' : 104,
'Minot' : 40,'Oakes' : 43,'Galesburg' : 25,'Edgeley' : 21,
'Bowman' : 13,'Fingal' : 62,'Liberty' : 135,'Adams' : 111,
'Leonard' : 72,'Linton' : 35,'Hope' : 102,'Berthold' : 56,
'Charbonneau' : 137,'McLeod' : 39,'Streeter' : 48,
'Prosper' : 44,'Steele' : 108,'Williston' : 53,'Courtenay' : 140,
'Wishek' : 57,'Wahpeton' : 63,'Werner' : 131,'Bismarck' : 11,
'Kempton' : 105,'Zeeland' : 110,'Crosby' : 65,'Sawyer' : 125,
'Hillsboro' : 30,'Grenora' : 127,'McHenry' : 38,'Columbus' : 17,
'Carson' : 96,'Arnegard' : 138,'Noonan' : 128,'Crane Creek' : 134,
'Ekre' : 75,'Forest River' : 24,'Fort Yates' : 89,'Grand Forks' : 26,
'Hofflund' : 31,'Little Falls' : 120,'Logan Center' : 141,'Medicine Hole' : 130,
'Parkers Prairie' : 116,'Pine Point' : 115,'Rat Lake' : 133,'St. Thomas' : 47,
'Turtle Lake' : 50,'Watford City' : 52,
#MN
'Ada' : 78,'Becker' : 118,'Campbell' : 87,'Clarissa' : 124,
'Eldred' : 2,'Fox' : 93,'Greenbush' : 70,'Hubbard' : 119,
'Humboldt' : 4,'Kennedy' : 82,'Mavie' : 71,'Ottertail' : 103,
'Perham' : 114,'Perley' : 3,'Rice' : 121,'Roseau' : 61,
'Sabin' : 60,'Staples' : 122,'Stephen' : 5,'Ulen' : 91,
'Wadena' : 117,'Warren' : 6,'Waukon' : 92,'Westport' : 123,
'Williams' : 95,
#MT
'Brorson' : 7,'Dagmar' : 99,'Dooley' : 101,'Froid' : 90,
'Redstone' : 100,'Sidney' : 8,
#NW
'Fargo' : 23,'Hettinger' : 29,
#SD
'Britton' : 55}

SyntaxError: invalid syntax (<string>, line 1)

## NDAWN Request Function

In [3]:
import pandas as pd
import requests
from datetime import date
from io import StringIO

class ndawn_request:

    def __init__(self, startDate='YYYY-MM-DD', endDate='YYYY-MM-DD', ontology = None, location = None, save = False):

        self.start = startDate
        self.end = endDate

        # List of ontology terms, and their URL codes to build request URL
        self.ontology = {
            'Air Temperature': ['variable=hdt', 'variable=hdt9'],
            'Relative Humidity': ['variable=hdrh', 'variable=hdrh9'],
            'Soil Temperature': ['variable=hdbst', 'variable=hdtst'],
            'Wind Speed': ['variable=hdws', 'variable=hdmxws', 'variable=hdws10', 'variable=hdmxws10'],
            'Wind Direction': ['variable=hdwd', 'variable=hdsdwd', '&variable=hdwd10', 'variable=hdsdwd10'],
            'Solar Radiation': ['variable=hdsr'],
            'Rainfall': ['variable=hdr'],
            'Air Pressure': ['variable=hdbp'],
            'Dew Point': ['variable=hddp'],
            'Wind Chill': ['variable=hdwc']}
        # Concatenate the ontology keys into a list for exception printout later
        ontologiesErrorMessage = '\n'.join(list(self.ontology.keys()))

        self.stations = station_set
        
        # Concatenate station names into a list for exception printout later
        stationsErrorMessage = '\n'.join(list(self.stations.keys()))
        self.save = save

        # This checks the start and end dates supplied to make sure they are valid
        # Start by converting dates into iso format
        startDateCheck = date.fromisoformat(startDate)
        endDateCheck = date.fromisoformat(endDate)
        # If start date is after end date, raise exception
        if startDateCheck > endDateCheck:
            raise Exception('End date cannot be before start date')
        
        # Create empty list to hold URL codes for ontology terms
        self.activeMeasures = []
        # If user supplies ontology terms
        if ontology is not None:
            for item in ontology:
                # If user-supplied term is not in the dictionary, raise exception
                if item not in self.ontology.keys():
                    raise Exception('Ontology term [' + str(item) + '] not recognized. Available ontology terms include: ' + '\n' + ontologiesErrorMessage)
                # Otherwise, append URL codes for ontology terms into the list of measurements to be requested
                else:
                    for code in self.ontology[item]:
                        self.activeMeasures.append(code)  
        # If user does not supply ontology terms, add all URL codes in dictionary to the list of measurements to be requested    
        else:
            for key in self.ontology:
                for code in self.ontology[key]:
                    self.activeMeasures.append(code)

        # Create empty list to hold URL codes for stations
        self.activeStations = []
        # If user supplies station names
        if location is not None:
            for name in location:
                # If user-supplied name is not in the dictionary, raise exception
                if name not in self.stations.keys():
                    raise Exception('Station [' + str(name) + '] not recognized. Available stations include: ' + '\n' + stationsErrorMessage)
                # Otherwise, append URL codes for stations into the list of stations to be requested
                else:
                    self.activeStations.append('station=' + str(self.stations[name]))
        # If user does not supply station names, add all station URL codes in dictionary to the list of stations to be requested
        else:    
            for key in self.stations:
                self.activeStations.append('station=' + str(self.stations[key]))

    def get_data(self):
        
        # Construct API call for the request
        baseURL = 'https://ndawn.ndsu.nodak.edu/table.csv?'
        stations = '&'.join(self.activeStations)
        measurements = '&'.join(self.activeMeasures)
        options = '&ttype=hourly&quick_pick=&begin_date=' + self.start + '&end_date=' + self.end
        finalURL = str(baseURL + stations + '&' + measurements + options)
        
        # Request page
        page = requests.get(finalURL)
        # If status code not 200, raise exception
        if page.status_code != 200:
            raise Exception('URL request status not 200. Status code = ' + page.status_code)

        print('Request successful')

        # Convert csv data to string
        content = str(page.content)
        # Remove large, unnecessary header
        trimContent = content[content.find('Station'):len(content)]
        # Replace newline/return with string literal newline
        formatContent = trimContent.replace('\\r\\n', '\n')
        # Convert content to file object
        contentFile = StringIO(formatContent)

        # Read content into pandas dataframe. Second header row contains units
        ndawnData = pd.read_csv(contentFile, header = [0, 1])
        
        # Concatenate headers to include units
        # Assign column list to object
        columnHeaders = list(ndawnData.columns)
        # List of new headers
        newHeaderList = []
        # Iterate through column names
        for number in range(0, len(columnHeaders)):
            # If no unit, keep header unchanged, pass into new list
            if 'Unnamed' in columnHeaders[number][1]:
                newHeaderList.append(columnHeaders[number][0])
            # If unit exists, concatenate header and unit, pass into new list
            else:
                newHeader = columnHeaders[number][0] + ' (' + columnHeaders[number][1] + ') '
                newHeaderList.append(newHeader)
        # Assign new column names
        ndawnData.columns = newHeaderList
        # Create single column for datetime
        ndawnData['Date'] = pd.to_datetime(ndawnData[['Year', 'Month', 'Day']])
        # Save to csv if save option selected
        if self.save:
            ndawnData.to_csv('ndawnData.csv', index=False)
        return ndawnData

## Function Call

In [4]:
from datetime import datetime, date, timedelta
import numpy as np

# so now we need to loop that function for the last 30 days.
#this gets today's date
end_day = date.today()
start_day = date.today()-timedelta(30)

end_day = str(end_day)
start_day = str(start_day)

print("Today's date:", end_day)
print("30 days ago date:", start_day)

desired_locations = (list(station_set.keys()))

exampleRequest = ndawn_request(startDate=start_day,
                               endDate=end_day,
                               ontology=['Air Temperature'],
                               location=desired_locations,
                               save = False)
ndawnDF = exampleRequest.get_data()
ndawnDF = ndawnDF.filter(['Station Name','Latitude (deg) ','Longitude (deg) ','Avg Air Temp (Degrees F) '], axis=1)
grouped = ndawnDF.groupby("Station Name")
new_DF = grouped.aggregate(np.mean)
new_DF.to_csv("GroupedNDAWN.csv")

Today's date: 2021-04-17
30 days ago date: 2021-03-18
Request successful


## CSV to Points

In [2]:
import arcpy
in_table = "C:\\Users\\Cole\\Documents\\GitHub\\GIS5572\\Lab4\\GroupedNDAWN.csv"
arcpy.management.XYTableToPoint(in_table, "Station_Pts2",
                                "Longitude (deg)","Latitude (deg)")

## IDW

In [20]:
arcpy.env.workspace = r'C:\Users\Cole\Documents\GitHub\GIS5572\Lab4'

import arcpy
from arcpy import env
from arcpy.sa import *

inPointFeatures = "Station_Pts2.shp"
zField = "Avg Air Temp (Degrees F)"

#ok so 200 is WAY too big
cellSize = .01
power = 3
#searchRadius = RadiusVariable(10, 150)

outIDW = arcpy.sa.Idw(inPointFeatures, zField, cellSize, power)
outIDW.save("IDW_Final")

## Kriging

In [24]:
arcpy.env.workspace = r'C:\Users\Cole\Documents\GitHub\GIS5572\Lab4'
import arcpy
from arcpy import env
from arcpy.sa import *

inFeatures = "Station_Pts2.shp"
field = "Avg Air Temp (Degrees F)"
cellSize = .01
#outVarRaster = "C:/sapyexamples/output/outvariance"
lagSize = 2
majorRange = 1
partialSill = 4
nugget = 0

kModelOrdinary = KrigingModelOrdinary("CIRCULAR", lagSize, majorRange, partialSill, nugget)
kRadius = RadiusFixed(20, 1)
outKriging = Kriging(inFeatures, field, kModelOrdinary, cellSize)
                    
outKriging.save("Kriging_final")

## Nearest Neighbor

In [29]:
arcpy.env.workspace = r'C:\Users\Cole\Documents\GitHub\GIS5572\Lab4'
import arcpy
from arcpy import env
from arcpy.sa import *

inPntFeat = "Station_Pts2.shp"
zField = "Avg Air Temp (Degrees F)"
outRaster = "NNbr_final"
cellSize = .01

# Execute NaturalNeighbor
arcpy.NaturalNeighbor_3d(inPntFeat, zField, outRaster, cellSize)