<p>
    <img src="https://s3.amazonaws.com/iotanalytics-templates/Logo.png" style="float:left;width:65px">
    <h1 style="float:left;color:#1A5276;padding-left:15px;font-size:20px;">AWS IoT Analytics | Notebook</h1>
</p>


Demonstration of [AWS IoT Analytics](https://aws.amazon.com/iot-analytics/) Notebooks. Environmental sensor data collected from an [Arduino Nano 33 BLE Sense](https://store.arduino.cc/usa/nano-33-ble-sense), using [REYAX RYLR896 LoRaWAN transceiver modules](https://www.amazon.com/RYLR896-Module-SX1276-Antenna-Command/dp/B07NB3BK5H). Notebook uses [pandas](https://pandas.pydata.org/) for data analysis and manipulation, and [matplotlob](https://matplotlib.org/) and [plotly](https://github.com/plotly/plotly.py) for visualization.

Author: [Gary A. Stafford](https://github.com/garystafford)

In [None]:
import sys

import boto3
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from matplotlib.dates import DateFormatter
from pandas.plotting import register_matplotlib_converters

When loading data from IoT Analytics datasets, the client should be initialized first:

In [None]:
# create IoT Analytics client
client = boto3.client('iotanalytics')

Now we can get the data location (URL) for the given dataset and start working with the data (In order to need to perform get_dataset_content, you need to grant iot analytics corresponding IAM permission):

In [None]:
# constants
MY_TIMEZONE='US/Eastern' # update to your local timezone or use 'UTC'
DATETIME_FORMAT='%y-%m-%d %H:%M' # update to your local date/time format

In [None]:
def date_parse(x):
    x = pd.to_datetime(x, infer_datetime_format=True, unit='s', utc=True)
    x = x.tz_convert(MY_TIMEZONE)
    return x

In [None]:
nrows_read = None # specify an integer, or 'None' if want to read whole data set

dataset = "iot_analytics_data_set"
dataset_url = client.get_dataset_content(datasetName = dataset)['entries'][0]['dataURI']

df = pd.read_csv(dataset_url,
                 nrows=nrows_read,
                 header=0,
                 low_memory=False,
                 infer_datetime_format=True,
                 date_parser=date_parse,
                 index_col=['ts'])

In [None]:
# clean up the DataFrame
df = df.drop(columns='__dt')
df = df.sort_values(by='ts', ascending=True)

In [None]:
# metadata attributes before transform
df['metadata'][0]

In [None]:
# transform metadata attribute, from device registry, to valid json object
df['metadata'].replace('{','{"',regex=True, inplace = True)
df['metadata'].replace('=','":"',regex=True, inplace = True)
df['metadata'].replace(', ','","',regex=True, inplace = True)
df['metadata'].replace(':"{"',':{"',regex=True, inplace = True)
df['metadata'].replace('}",','},',regex=True, inplace = True)
df['metadata'].replace('}','"}',regex=True, inplace = True)

In [None]:
# metadata attributes post transform
df['metadata'][0]

In [None]:
# metadata attributes as json
metadata = pd.read_json(df['metadata'][0], orient='records', typ='series')
metadata

In [None]:
# example of extracting value from a metadata attribute
print(metadata['thingname'])

In [None]:
df.tail(5)

In [None]:
# initial readings were not reflective of environment
df = df.loc[df['msg_received'] >='2020-08-24T15:02:06.354+0000']
df = df.loc[df['msg_received'] <='2020-08-24T19:02:06.354+0000']

In [None]:
# filter temp/humidity outliers (>1% & <99%)
df = df.loc[df['temperature'] > df.groupby('device_id').temperature.transform(lambda x: x.quantile(.01))]
df = df.loc[df['temperature'] < df.groupby('device_id').temperature.transform(lambda x: x.quantile(.99))]

df = df.loc[df['humidity'] > df.groupby('device_id').humidity.transform(lambda x: x.quantile(.01))]
df = df.loc[df['humidity'] < df.groupby('device_id').humidity.transform(lambda x: x.quantile(.99))]

In [None]:
print('DataFrame Stats')
print('-------------')
print('Record count: {:,}'.format(df['temperature'].count()))
print('DataFrame size (MB): {:,.2f}'.format(sys.getsizeof(df)/1024/1024))
print('-------------')
print('Time range (min): {:%Y-%m-%d %H:%M:%S %Z}'.format(df.index[1]))
print('Time range (max): {:%Y-%m-%d %H:%M:%S %Z}'.format(df.index[-1]))
print('Temperature (min): {:.2f}{}'.format(df['temperature'].min(), '°F'))
print('Temperature (max): {:.2f}{}'.format(df['temperature'].max(), '°F'))
print('Humidity (min): {:.2f}{}'.format(df['humidity'].min(), '%'))
print('Humidity (max): {:.2f}{}'.format(df['humidity'].max(), '%'))
print('Barometric Pressure (min): {:.2f}{}'.format(df['pressure'].min(), ' kPa'))
print('Barometric Pressure (max): {:.2f}{}'.format(df['pressure'].max(), ' kPa'))

In [None]:
# matplotlib datetime config
plt.rcParams['timezone']=MY_TIMEZONE
register_matplotlib_converters()
myFmt = DateFormatter(DATETIME_FORMAT)

## Scatter Plot using Matplotlib
Using [Matplotlib](https://matplotlib.org/): Visualization with Python

In [None]:
_, ax = plt.subplots(1, 1, figsize=(18, 9))
ax.plot(df.temperature,
        df.humidity,
        marker='o',
        linestyle='',
        alpha=.5,
        ms=10,
        label='lora-iot-gateway-01')
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Temperature vs. Humidity')
plt.xlabel('Temperature (˚F)')
plt.ylabel('Humidity (%)')
plt.show()

## Plotly vs. Matplotlib for Graphing
Graphs using [Plotly](https://plotly.com/python/) Python Open Source Graphing Library

In [None]:
# strip timezone info so Plotly won't convert to UTC
df.index = df.index.tz_localize(None)

In [None]:
fig = px.scatter(df,
                 x='temperature',
                 y='humidity',
                 color='device',
                 hover_name='device',
                 trendline='ols',
                 render_mode='svg',
                 hover_data={'device_id': False, 'temperature':':.2f', 'humidity':':.2f'})

fig.update_layout(title='Temperature vs. Humidity',
                  xaxis_title='Temperature (˚F)',
                  yaxis_title='Humidity (%)',
                  template='ggplot2')

fig.show()

## Moving Average (MA) Graph
### Temperature, Humidity, and Barometric Pressure
Smoothing data using the mean average of a 1 minute rolling window.  
1 minutes == (12) data-points @ 5 second intervals

In [None]:
# barometric pressure has already been converted from kPa to inHg

fig, ax = plt.subplots(1, 1, figsize=(18, 9))
mean_temp = df.temperature.rolling(window=12).mean()
mean_humid = df.humidity.rolling(window=12).mean()
mean_pressure = df.pressure.rolling(window=12).mean()

ax.plot(mean_temp,
        label='Temperture (˚F)')
ax.plot(mean_humid,
        label='Humidity (%)')
ax.plot(mean_pressure,
        label='Pressue (inHg)')

fig.autofmt_xdate()
ax.xaxis.set_major_formatter(myFmt)
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Temperature, Humidity, and Barometric Pressure over Time')
plt.ylabel('Sensor Readings')
plt.xlabel('Time')
plt.show()

## Moving Average (MA) Graph
### RGB Color and Ambient Light Intensity
Smoothing data using the mean average of a 1 minute rolling window.  
1 minutes == (12) data-points @ 5 second intervals

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(18, 9))

mean_red = df.red.rolling(window=12).mean()
mean_green = df.green.rolling(window=12).mean()
mean_blue = df.blue.rolling(window=12).mean()
mean_ambient = df.ambient.rolling(window=12).mean()

ax.plot(mean_red,
        label='Red',
        color='red')
ax.plot(mean_green,
        label='Green',
        color='green')
ax.plot(mean_blue,
        label='Blue',
        color='blue')
ax.plot(mean_ambient,
        label='Ambient Light',
        color='darkgrey')

fig.autofmt_xdate()
ax.xaxis.set_major_formatter(myFmt)
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('RGB Color and Ambient Light Intensity over Time')
plt.ylabel('RGBA (0-255)')
plt.xlabel('Time')
plt.show()

## Plotly vs. Matplotlib for Graphing
Graphs using [Plotly](https://plotly.com/python/) Python Open Source Graphing Library

In [None]:
mean_red = df.red.rolling(window=12).mean()
mean_green = df.green.rolling(window=12).mean()
mean_blue = df.blue.rolling(window=12).mean()
mean_ambient = df.ambient.rolling(window=12).mean()

fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=mean_red,
                         line_color="#ff0000",
                         mode='lines',
                         name='red'))
fig.add_trace(go.Scatter(x=df.index, y=mean_green,
                         line_color="#00ff00",
                         mode='lines',
                         name='green'))
fig.add_trace(go.Scatter(x=df.index, y=mean_blue,
                         line_color="#0000ff",
                         mode='lines', name='blue'))
fig.add_trace(go.Scatter(x=df.index, y=mean_ambient,
                         line_color="#999999",
                         mode='lines', name='ambient'))

fig.update_layout(title='RGB Color and Ambient Light Intensity over Time',
                  xaxis_title='Time',
                  yaxis_title='RGBA (0-255)',
                  template='ggplot2')

fig.show()

<div style="height:60px;"><div style="height:7px;background-color:#20B3CD;width:100%;margin-top:20px;position:relative;"><img src="https://s3.amazonaws.com/iotanalytics-templates/Logo.png" style="height:50px;width:50px;margin-top:-20px;position:absolute;margin-left:42%;"></div></div>