# <font color='blue'> Create Pandas Dataframes by Polygon</font>

## Overview
This notebook allows a user to fetch Sensors and Datapoints from the [Geostreaming API]( https://github.com/geostreams/geostreams) by inputing a polygon as list of geographical coordinates.  In addition, you can set the start and end time of datapoints and choose which data source institutions to pull from.

This notebook has functionality to create a Pandas Dataframe sensors and datapoints.  The sensor dataframe lists each sensor's metadata.  The datapoints dataframe contains all the available measurement values for each datapoint along with the sensor is for that dataframe.

<hr>

### Setup Servers and libraries

<ul>
    <li>Setup Python Env</li>
    <ul><li>Read deployment.md</li></ul>
    <li>Choose geostreams instance by populating host below, e.g.</li>
    <ul><li>https://greatlakestogulf.org/geostreams</li>
        <li>https://illinois.greatlakestogulf.org/geostreams</li>
    </ul>
    <li>Create an Account on your instance of choice</li>
    <ul><li>Populate username and password</li></ul>
</ul>

In [None]:
from pygeotemporal.sensors import SensorsApi
from pygeotemporal.streams import StreamsApi
from pygeotemporal.datapoints import DatapointsApi

from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd

host = r"https://gltg-dev.ncsa.illinois.edu/geostreams"
username = "" # your email 
password = "" # password for instance

sensorclient = SensorsApi(host=host, username=username, password=password)
streamclient = StreamsApi(host=host, username=username, password=password)
datapointclient = DatapointsApi(host=host, username=username, password=password)

from pprint import pprint

### Set Parameters for Getting Sensors and Datapoints

In [None]:
since = '20060901' # Date to start getting datapoints
until = '20200910' # Date to end getting datapoints
sources = None
format_out = "json" # Don't change unless you plan to change methods
only_count = "false" # set to "true" to get number of datapoints without pulling datapoints

coordinates = [
                [
                    -87.614822,
                    36.3239771
                ],
                [
                    -87.5488661,
                    35.7375952
                ],
                [
                    -85.8120269,
                    35.8178132
                ],
                [
                    -85.9549314,
                    36.3947567
                ],
                [
                    -87.614822,
                    36.3239771
                ]
            ]
  


### Functions' descriptions

<ul>
    <li>create_geocode(): create geocode string for geostreams URL</li>
    <li>get_sensors_by_geocode(geocode): get all sensors within the polygon</li>
    <li>get_sensors_parameters(sensors): get all available measurement names</li>
    <li>create_sensor_dataframe(sensors): create dataframe rows of sensor metadata</li>
    <li>get_datapoints(sensors): get all datapoints from sensors</li>
    <li>create_datapoints_dataframe(datapoints, sensors_parameters): rows a datapoints</li>
</ul>

In [None]:
def create_geocode():
    
    geocode = ''    
    for coord in coordinates:
        geocode += str(round(coord[1],8)) + '%2C' + str(round(coord[0],8)) + '%2C'
    geocode = geocode[:-3]
    
    return geocode

def get_sensors_by_geocode(geocode):
    r = sensorclient.sensors_by_polygon(geocode)
    
    if r.status_code != 200:
        print("Failed to get sensors with status code %s" % r.status_code)
        
    sensors = r.json()['sensors']
    
    print("Downloaded %s sensors" % len(sensors))
    
    return sensors

def get_sensors_parameters(sensors):
    sensors_parameters = []
    for sensor in sensors:
        for param in sensor['parameters']:
            if param in sensors_parameters or param in ['owner','site','source']:
                continue
            sensors_parameters.append(param)
        break
    
    return sensors_parameters

def create_sensor_dataframe(sensors):
    
    sensor_rows = []
    for sensor in sensors:
        row = []
        row.append(sensor['id'])
        row.append(sensor['name'])
        if 'type' in sensor['properties'] and 'network' in sensor['properties']['type']:
            row.append(sensor['properties']['type']['network'])
        else:
            row.append('')
        row.append(sensor['min_start_time'])
        row.append(sensor['max_end_time'])
        row.append(sensor['geometry']['coordinates'][1])
        row.append(sensor['geometry']['coordinates'][0])
        if 'huc_name' in sensor['properties']['huc']:
            row.append(sensor['properties']['huc']['huc_name'])
        else:
            row.append('')
        row.append(sensor['properties']['huc']['huc8']['code'])
        
        sensor_rows.append(row)

    sensors_dataframe = pd.DataFrame(sensor_rows,columns=['SENSOR_ID',
                                                            'NAME',
                                                            'NETWORK',
                                                            'DATA_START',
                                                            'DATA_END',
                                                            'LATITUDE',
                                                            'LONGITUDE',
                                                            'HUC NAME',
                                                            'HUC8'])       
    return sensors_dataframe

def get_datapoints(sensors):
    
    all_datapoints = []

    for sensor in sensors:
        r = datapointclient.get_datapoints(
                                            sensor_id=sensor['id'], 
                                            since=since, 
                                            until=until, 
                                            sources=sources, 
                                            format=format_out, 
                                            onlyCount=only_count
        )

        if r.status_code != 200:
            print("Datapoints download for sensor %s failed with status code %s" % (sensor_id, r.status_code))
            continue
        
        all_datapoints += r.json()
        
    return all_datapoints

def create_datapoints_dataframe(datapoints, sensors_parameters):
    
    column_names = ['sensor_id','datetime','created'] + sensors_parameters
    datapoint_rows = []
    for datapoint in datapoints:
        row = []
        row.append(datapoint['sensor_id'])
        row.append(datetime.strptime(datapoint['start_time'][:-4].replace('"T"','T'),'%Y-%m-%dT%H:%M'))
        row.append(datetime.strptime(datapoint['created'][:-4].replace('"T"','T'),'%Y-%m-%dT%H:%M'))
    
    
        for prop in sensors_parameters:
            if prop == "site":
                continue
            if prop in datapoint['properties']:
                row.append(datapoint['properties'][prop])
            else:
                row.append('')
        datapoint_rows.append(row)
        
    datapoint_dataframe = pd.DataFrame(datapoint_rows, columns=column_names)
    for param in column_names[3:]:
        datapoint_dataframe[param] = pd.to_numeric(datapoint_dataframe[param])
        
    return datapoint_dataframe
    

In [None]:
geocode = create_geocode()

sensors = get_sensors_by_geocode(geocode)

sensors_parameters = get_sensors_parameters(sensors)

sensor_dataframe = create_sensor_dataframe(sensors)

datapoints = get_datapoints(sensors)

datapoints_dataframe = create_datapoints_dataframe(datapoints, sensors_parameters)



### Sample data for sensor dataframe

In [None]:
display(sensor_dataframe.head(3))
display(sensor_dataframe.tail(3)) 

### Sample data and statistics for datapoints dataframe

In [None]:
display(datapoints_dataframe.head(3))
display(datapoints_dataframe.tail(3))

display(datapoints_dataframe[sensors_parameters].describe())

### Plots for datapoints dataframe

In [None]:
for param in sensors_parameters:
    datapoints_dataframe.plot(kind='line',x='datetime',y=param)