# Get Datapoints by Polygon

## Setup Servers and libraries

<ul>
    <li>Setup Python Env</li>
    <ul><li>Read deployment.md</li></ul>
    <li>Choose geostreams instance, e.g.</li>
    <ul><li>https://greatlakestogulf.org/geostreams</li>
        <li>https://illinois.greatlakestogulf.org/geostreams</li>
    </ul>
    <li>Create an Account on your instance of choice</li>
</ul>

In [None]:
from pygeotemporal.sensors import SensorsApi
from pygeotemporal.streams import StreamsApi
from pygeotemporal.datapoints import DatapointsApi

from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd

host = r"https://gltg-dev.ncsa.illinois.edu/geostreams"
username = "" # email for instance
password = "" # password for instance

sensorclient = SensorsApi(host=host, username=username, password=password)
streamclient = StreamsApi(host=host, username=username, password=password)
datapointclient = DatapointsApi(host=host, username=username, password=password)

## Set Parameters for Getting Datapoints

In [None]:
since = '20060901' # Date to start getting datapoints
until = '20200910' # Date to end getting datapoints
sources = None
format_out = "json" # Don't change unless you plan to change methods
only_count = "false" # set to "true" to get number of datapoints without pulling datapoints

geojson_input = {
  "coordinates": [
    [
      [
        -87.614822,
        36.3239771
      ],
      [
        -87.5488661,
        35.7375952
      ],
      [
        -85.8120269,
        35.8178132
      ],
      [
        -85.9549314,
        36.3947567
      ],
      [
        -87.614822,
        36.3239771
      ]
    ]
  ],
  "type": "Polygon"
}


## Routine to Create Dataframe

In [None]:

# Create the geocode for the API
geocode = ''
for coord in geojson_input['coordinates'][0]:
    geocode += str(round(coord[1],8)) + '%2C' + str(round(coord[0],8)) + '%2C'
geocode = geocode[:-3]

r = datapointclient.get_datapoints(
                                    since=since, 
                                    until=until, 
                                    sources=sources, 
                                    format=format_out, 
                                    onlyCount=only_count,
                                    geocode=geocode
)


datapoints = r.json()
print("Downloaded %s datapoints" % len(datapoints))

all_parameters = []
for datapoint in datapoints:
    for param in datapoint['properties']:
        if param == 'site':
            continue
        if param not in all_parameters:
            all_parameters.append(param)

# Order the column names
if "source" in all_parameters:
    all_parameters.insert(0, all_parameters.pop(all_parameters.index("source")))
if "owner" in all_parameters:
    all_parameters.insert(0, all_parameters.pop(all_parameters.index("owner")))

all_parameters[2:] = sorted(all_parameters[2:])

column_names = ["sensor_id", "sensor_name", "datetime", "created"] + all_parameters

datapoint_rows = []
for datapoint in datapoints:
    row = []
    row.append(datapoint['sensor_id'])
    row.append(datapoint['sensor_name'])
    row.append(datetime.strptime(datapoint['start_time'][:-4].replace('"T"','T'),'%Y-%m-%dT%H:%M'))
    row.append(datetime.strptime(datapoint['created'][:-4].replace('"T"','T'),'%Y-%m-%dT%H:%M'))
    
    properties = sorted(datapoint['properties'])
    
    
    for prop in all_parameters:
        if prop == "site":
            continue
        if prop in datapoint['properties']:
            row.append(datapoint['properties'][prop])
        else:
            row.append('')
    datapoint_rows.append(row)
    
    
df = pd.DataFrame(datapoint_rows, columns=column_names)
for param in all_parameters[2:]:
    df[param] = pd.to_numeric(df[param])

## Show first 3 Rows of Dataframe 

In [None]:
df.head(3)

## Show Statistics

In [None]:
df[all_parameters].describe()

## Show Plots

In [None]:
for param in all_parameters[2:]:
    df.plot(kind='line',x='datetime',y=param)