# Get Datapoints by List of Sensor IDs

## Setup Servers and libraries

<ul>
    <li>Setup Python Env</li>
    <ul><li>Read deployment.md</li></ul>
    <li>Choose geostreams instance, e.g.</li>
    <ul><li>https://greatlakestogulf.org/geostreams</li>
        <li>https://illinois.greatlakestogulf.org/geostreams</li>
    </ul>
    <li>Create an Account on your instance of choice</li>
</ul>

In [None]:
from pygeotemporal.sensors import SensorsApi
from pygeotemporal.streams import StreamsApi
from pygeotemporal.datapoints import DatapointsApi

from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd

host = r"https://gltg-dev.ncsa.illinois.edu/geostreams"
username = "" # email for instance
password = "" # password for instance

username = "slavenas@illinois.edu"
password = "medicici"

sensorclient = SensorsApi(host=host, username=username, password=password)
streamclient = StreamsApi(host=host, username=username, password=password)
datapointclient = DatapointsApi(host=host, username=username, password=password)

## Set Parameters for Getting Datapoints

In [None]:
sensor_ids = [8404, 8407] # Can be any length, but don't pull everything at once
since = '20200901' # Date to start getting datapoints
until = '20200910' # Date to end getting datapoints
sources = None
format_out = "json" # Don't change unless you plan to change methods
only_count = "false" # set to "true" to get number of datapoints without pulling datapoints

## Routine to Create Dataframe

In [None]:
all_parameters = []

# Get all parameters for all sensors
for sensor_id in sensor_ids:    
    sensor = sensorclient.sensor_get(sensor_id).json()['sensor']
    
    if sensor['max_end_time'] == 'N/A':
        print("Updating sensor statistics for sensor_id=%s" % sensor_id)
        sensorclient.sensor_statistics_post(sensor_id)

    for parameter in sensor['parameters']:
        if parameter[-2:] == 'qc':
            continue
        if parameter not in all_parameters:
            all_parameters.append(parameter)


# Get all datapoints from sensor_ids
all_datapoints = []
n_bad_datapoints = 0

for sensor_id in sensor_ids:
    r = datapointclient.get_datapoints(
                                        sensor_id=sensor_id, 
                                        since=since, 
                                        until=until, 
                                        sources=sources, 
                                        format=format_out, 
                                        onlyCount=only_count
    )
       
    if r.status_code != 200:
        print("Datapoints download for sensor %s failed with status code %s" % (sensor_id, r.status_code))
        continue
        
    datapoints = r.json()
    
    for datapoint in datapoints:
        row = []
        row.append(sensor['id'])
        row.append(sensor['name'])
        row.append(sensor['geometry']['coordinates'][1])
        row.append(sensor['geometry']['coordinates'][0])
        row.append(sensor['properties']['huc']['huc_name'])
        row.append(sensor['properties']['huc']['huc8']['code'])
        
        try:
            row.append(datetime.strptime(datapoint['end_time'][:-4].replace('"T"','T'),'%Y-%m-%dT%H:%M'))
        except:
            n_bad_datapoints += 1
            continue
              
        for parameter in all_parameters:
            if parameter in datapoint['properties']:
                row.append(datapoint['properties'][parameter])
            else:
                row.append('')
                
        all_datapoints.append(row)
    
sensor_values = ['SENSOR_ID','NAME','LATITUDE','LONGITUDE','HUC NAME','HUC8']
columns = sensor_values + ['datetime'] + all_parameters

# Create dataframe
df = pd.DataFrame(all_datapoints,columns=columns)
for param in all_parameters:
    df[param] = pd.to_numeric(df[param])

## Show first 3 Rows of Dataframe 

In [None]:
df.head(3)

## Show Statistics

In [None]:
df[all_parameters].describe()

## Show Plots

In [None]:
for param in all_parameters:
    df.plot(kind='line',x='datetime',y=param)