## An Exploration of Chicago's Array of Things API (via python client)
Array of Things has expanded in Chicago to include more than 100 continuous sensors across the city.

**Resources**
* [Home Page](https://arrayofthings.github.io)
* [python client](https://github.com/UrbanCCD-UChicago/aot-client-py)
* [API documentation](https://arrayofthings.docs.apiary.io)

**TODOs**
  * group measurements by larger cluster than sensor? by project?
  * efficiently query regular interval of data, to annimate
  * normalize measurements, choose reasonable circle radii and color
    * map different sensors to different visualizations

In [None]:
 # !pip install aot-client

In [40]:
from aot_client import AotClient

client = AotClient()

# What are the methods/properties of the client?
[_ for _ in dir(client) if not _.startswith('_')]

['get_node_details',
 'get_project_details',
 'get_sensor_details',
 'list_metrics',
 'list_nodes',
 'list_observations',
 'list_projects',
 'list_sensors']

It appears results will be paginated and we can also pass a filter. From the docs website I see that users may pass a timestamp filter in ISO 8601 format. Let's get measurements from the past 15 minutes.

In [41]:
import datetime

import pandas as pd
from aot_client import AotClient
from aot_client import F


def time_x_mins_ago(minutes:int):
    '''Get formatted time to pass to API filter, relative to current time
    '''
    t = (datetime.datetime.now() - 
         datetime.timedelta(minutes=minutes) + 
         datetime.timedelta(hours=5))  # convert timezone from central to UTC
    t = t.isoformat()
    
    return t[0:19]


def unpack_response(response, page_limit=1000):
    try:
        pages = []
        for i, page in enumerate(response):
            if i + 1 > page_limit:
                break
            pages.extend(page.data)
    except HTTPError as e:
        print(e)    
    finally:
        return pages


def process_observations(obs_df):
    obs_df = obs_df.copy()
    obs_df['timestamp'] = pd.to_datetime(obs_df['timestamp'], utc=True)
    obs_df['timestamp'] = obs_df['timestamp'].dt.tz_convert('US/Central')
    
    # extract lat/lon to columns
    obs_df['coords'] = obs_df['location'].apply(
        lambda x: x['geometry']['coordinates'])
    obs_df[['lon', 'lat']] = pd.DataFrame(
        obs_df['coords'].tolist(), columns=['lon', 'lat'])
    obs_df = obs_df.drop(columns=['coords'])
    
    # fix positive lon values
    mask = obs_df['lon'] > 0
    if sum(mask) > 0:
        print(f'fixed {sum(mask)} rows with positive lon value')
        obs_df.loc[mask, 'lon'] = obs_df.loc[mask, 'lon'] * -1

    # remove lat/lon values at 0
    mask = (obs_df['lon'] != 0) & (obs_df['lat'] != 0)
    if len(obs_df) - sum(mask) > 0:
        print(f'removed {len(obs_df) - sum(mask)} rows with lat/lon at 0')
        obs_df = obs_df.loc[mask]

    # remove lat values less than 40 degrees
    mask = (obs_df['lat'] > 40)
    if len(obs_df) - sum(mask) > 0:
        print(f'removed {len(obs_df) - sum(mask)} '
              'rows with lat/lon outside Chicago region')
        obs_df = obs_df.loc[mask]
    
    return obs_df

In [None]:
client = AotClient()

# create filter
f = F('size', '90000')
f &= ('timestamp', 'ge', time_x_mins_ago(5))
# f &= ('sensor', 'image.image_detector.person_total')
# f &= ('time_bucket', 'avg:1 hour')
# f &= ('sensor', 'image.image_detector.car_total')
# f &= ('sensor', 'metsense.tsys01.temperature')

response = client.list_observations(filters=f)
print(response.current_link)
pages = unpack_response(response, page_limit=1)
print(len(pages))
obs_df = pd.DataFrame(pages)
# obs_df = process_observations(obs_df)
print(f"shape: {obs_df.shape}")
obs_df.head()

In [None]:
len(obs_df['node_vsn'].unique())

In [None]:
obs_df.groupby('node_vsn')['sensor_path'].nunique()

In [None]:
import folium
import folium.plugins

def map(df):
    m = folium.Map(location=[df['lat'].mean(), 
                             df['lon'].mean()],
                   tiles='CartoDB dark_matter',
                   zoom_start=10)

    for i, r in df.iterrows():
        folium.CircleMarker(
            location=(r['lat'], r['lon']),              
#             radius=3,
#             color=r['color'],
#             weight=0.5,
            tooltip=f"{r['timestamp']}<br>{r['value']} {r['uom']}",
#             popup=folium.Popup(f"{r['value']} {r['uom']}", max_width=500),
            fill=True
        ).add_to(m)

    folium.plugins.Fullscreen(
        position='topright',
        force_separate_button=True
    ).add_to(m)

    return m

In [None]:
response.current_link

In [None]:
len(sensors_df['path'].unique())

In [None]:
obs_df.head()

In [None]:
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

sensors = client.list_sensors()
sensors_df = pd.DataFrame(sensors.data)
sensors_df.head()

@interact_manual
def choose_sensor(sensor=sensors_df['path'].unique()):
    client = AotClient()
    f = F('sensor', sensor)

    response = client.list_observations(filters=f)
    print('API call:', response.current_link)
    pages = unpack_response(response, page_limit=5)
    
    if not pages:
        print('No data found.')
        return None
    
    obs_df = pd.DataFrame(pages)
    obs_df = process_observations(obs_df)
    print(obs_df.shape)
    
    return map(obs_df.drop_duplicates(['lon', 'lat']))

### Plotly

In [None]:
# initial configuration
import plotly
from os import getenv
from dotenv import load_dotenv

load_dotenv()

plotly.tools.set_credentials_file(
    username=getenv('PLOTLY_USER'), 
    api_key=getenv('PLOTLY_API_KEY')
)

In [None]:
# exploring plots
import plotly.plotly as py
import plotly.graph_objs as go
from IPython.display import IFrame

trace0 = go.Scatter(
    x=[1, 2, 3, 4],
    y=[10, 15, 13, 17]
)
trace1 = go.Scatter(
    x=[1, 2, 3, 4],
    y=[16, 5, 11, 9]
)
data = [trace0, trace1]

plot_url = py.plot(data, filename = 'basic-line', auto_open=False)
IFrame(src=plot_url, width="100%", height="600px", frameBorder="0")

In [44]:
import pandas as pd
SENSOR_DF = pd.read_csv('data/sensor_mapping.csv')

def query_aot(sensor_hrf, size_per_page=100000, page_limit=1, mins_ago=12*60):
    sensor = SENSOR_DF.loc[SENSOR_DF['sensor_measure']==sensor_hrf, 
                           'sensor_path'].values[0]

    client = AotClient()

    f = F('size', str(size_per_page))
    f &= ('sensor', sensor)
    f &= ('timestamp', 'ge', time_x_mins_ago(mins_ago))

    response = client.list_observations(filters=f)
    pages = unpack_response(response, page_limit=page_limit)
    obs_df = pd.DataFrame(pages)
    obs_df = process_observations(obs_df)
    
    return obs_df

In [45]:
df = query_aot('Carbon Monoxide')

fixed 1406 rows with positive lon value
removed 4752 rows with lat/lon at 0
removed 1406 rows with lat/lon outside Chicago region


In [46]:
df

Unnamed: 0,location,node_vsn,sensor_path,timestamp,uom,value,lon,lat
0,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:30:39-05:00,ppm,-4.54179,-87.612155,41.730395
3,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:30:14-05:00,ppm,-4.26785,-87.612155,41.730395
6,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:29:49-05:00,ppm,-4.31434,-87.612155,41.730395
9,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:29:24-05:00,ppm,-4.22537,-87.612155,41.730395
12,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:28:54-05:00,ppm,-4.05681,-87.612155,41.730395
15,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:28:29-05:00,ppm,-4.03826,-87.612155,41.730395
18,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:28:04-05:00,ppm,-4.39906,-87.612155,41.730395
21,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:27:39-05:00,ppm,-4.22505,-87.612155,41.730395
24,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:27:14-05:00,ppm,-4.52669,-87.612155,41.730395
27,"{'type': 'Feature', 'geometry': {'type': 'Poin...",W00D,chemsense.co.concentration,2019-05-23 16:26:49-05:00,ppm,-4.56574,-87.612155,41.730395


## Import data from AoT Archive

In [1]:
import pandas as pd

In [2]:
from app.aot import load_aot_archive_day, clean_aot_archive_obs, get_nodes

In [7]:
import requests
from bs4 import BeautifulSoup

In [None]:
url = 'https://aot-file-browser.plenar.io/data-sets/chicago-complete'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')

In [14]:
node_table_div = soup.find(id="node-info")

In [27]:
pd.read_html(str(list(node_table_div.children)))[0]

Unnamed: 0,node_id,project_id,vsn,address,lat,lon,description,start_timestamp,end_timestamp,Unnamed: 9
0,001e0610ba46,AoT_Chicago,004,State St & Jackson Blvd Chicago IL,41.878377,-87.627678,AoT Chicago (S) [C],2017/10/09 00:00:00,,
1,001e0610ba3b,AoT_Chicago,006,18th St & Lake Shore Dr Chicago IL,41.858136,-87.616055,AoT Chicago (S),2017/08/08 00:00:00,,
2,001e0610ba8f,AoT_Chicago,00D,Cornell & 47th St Chicago IL,41.810342,-87.590228,AoT Chicago (S),2017/08/08 00:00:00,,
3,001e0610ba16,AoT_Chicago,010,Homan Ave & Roosevelt Rd Chicago IL,41.866349,-87.710543,AoT Chicago (S) [C],2018/07/18 00:00:00,,
4,001e0610ba8b,AoT_Chicago,018,Stony Island Ave & 63rd St Chicago IL,41.7806,-87.586456,AoT Chicago (S) [C],2018/02/26 00:00:00,,
5,001e0610ba18,AoT_Chicago,01D,Damen Ave & Cermak Chicago IL,41.852179,-87.675825,AoT Chicago (S),2017/12/15 00:00:00,,
6,001e0610bc10,AoT_Chicago,01F,State St & 87th Chicago IL,41.736314,-87.624179,AoT Chicago (S) [C],2018/02/22 00:00:00,,
7,001e0610bbf9,AoT_Chicago,020,Western Ave & 69th St Chicago IL,41.768319,-87.683396,AoT Chicago (S) [C],2018/02/13 00:00:00,,
8,001e0610bbff,AoT_Chicago,025,Western Ave & 18th St Chicago IL,41.857797,-87.685806,AoT Chicago (S),2017/12/15 00:00:00,,
9,001e0610ba15,AoT_Chicago,02A,Jeffrey Ave & 95th St Chicago IL,41.722457,-87.575350,AoT Chicago (S) [C],2018/02/22 00:00:00,,


In [11]:
soup.findAll(class_='table')

[<table class="table">
 <tbody>
 <tr>
 <th>Source</th>
 <td>
 <a href="http://www.mcs.anl.gov/research/projects/waggle/downloads/datasets/AoT_Chicago.complete.latest.tar" target="_blank">http://www.mcs.anl.gov/research/projects/waggle/downloads/datasets/AoT_Chicago.complete.latest.tar</a> </td>
 </tr>
 <tr>
 <th>Data Starts On</th>
 <td>14 Sep 2016, 00:00:00 UTC</td>
 </tr>
 <tr>
 <th>Data Ends On</th>
 <td>22 May 2019, 20:46:11 UTC</td>
 </tr>
 <tr>
 <th>Latest Data Tarball Created On</th>
 <td>22 May 2019, 20:46:11 UTC</td>
 </tr>
 </tbody>
 </table>, <table class="table">
 <thead>
 <tr>
 <th>Slice</th>
 <th>Link</th>
 <th>Created</th>
 <th>Expires</th>
 <th>Size</th>
 </tr>
 </thead>
 <tbody>
 <tr>
 <td>Recent</td>
 <td><a href="http://www.mcs.anl.gov/research/projects/waggle/downloads/datasets/AoT_Chicago.complete.recent.csv" target="_blank">http://www.mcs.anl.gov/research/projects/waggle/downloads/datasets/AoT_Chicago.complete.recent.csv</a></td>
 <td>Within the last 5 minutes</td

In [36]:
get_nodes()

Unnamed: 0,node_id,project_id,vsn,lat,lon
0,001e0610ba46,AoT_Chicago,004,41.878377,-87.627678
1,001e0610ba3b,AoT_Chicago,006,41.858136,-87.616055
2,001e0610ba8f,AoT_Chicago,00D,41.810342,-87.590228
3,001e0610ba16,AoT_Chicago,010,41.866349,-87.710543
4,001e0610ba8b,AoT_Chicago,018,41.780600,-87.586456
5,001e0610ba18,AoT_Chicago,01D,41.852179,-87.675825
6,001e0610bc10,AoT_Chicago,01F,41.736314,-87.624179
7,001e0610bbf9,AoT_Chicago,020,41.768319,-87.683396
8,001e0610bbff,AoT_Chicago,025,41.857797,-87.685806
9,001e0610ba15,AoT_Chicago,02A,41.722457,-87.575350


In [4]:
df = load_aot_archive_day(day='2019-05-14')
df = clean_aot_archive_obs(df)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

