# Batch anomaly detection with the Anomaly Detector API

### Use this Jupyter notebook to start visualizing anomalies as a batch with the Anomaly Detector API in Python.

This notebook shows you how to send a batch anomaly detection request, and vizualize the anomalies found throughout the example data set. The graph created at the end of this notebook will display the following:
* Anomalies found throughout the data set, highlighted.
* The expected values versus the values contained in the data set.
* Anomaly detection boundaries 


Install the following Python libraries.

- pandas
- numpy
- bokeh
- ipywidgets

In [None]:
# Select device for anomaly analysis

deviceId = "hvac_simulator"
inferenceDays = 7

In [None]:
# Install required packages. uncomment to install

# ! pip3 install notebook
# ! pip3 install azure-ai-anomalydetector
# ! pip3 install azure-core
# ! pip3 install azure-storage-blob
# ! pip3 install python-dotenv
# ! pip3 install pandas
# ! pip3 install plotly
# ! pip3 install bokeh

In [None]:
from dotenv import load_dotenv
from pathlib import Path
import json
import numpy as np
import os
import pandas as pd
import requests
import tempfile
import warnings

from bokeh.models import ColumnDataSource
from bokeh.palettes import Blues4
from bokeh.plotting import figure,output_notebook, show
from dateutil import parser

warnings.filterwarnings('ignore')
output_notebook()

In [None]:
# Load environment variables

env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

inference_telemetry_endpoint_url = os.environ.get('inference_telemetry_endpoint_url')
inference_telemetry_endpoint_key = os.environ.get('inference_telemetry_endpoint_key')
anomaly_detector_endpoint = os.environ.get('anomaly_detector_endpoint') + '/anomalydetector/v1.0/timeseries/entire/detect'
anomaly_detector_key = os.environ.get('anomaly_detector_key')

In [None]:
# Call the anomaly detector API

def detect(endpoint, apikey, request_data):

    headers = {'Content-Type': 'application/json', 'Ocp-Apim-Subscription-Key': apikey}
    response = requests.post(endpoint, data=json.dumps(request_data), headers=headers)
    if response.status_code == 200:
        return json.loads(response.content.decode("utf-8"))
    else:
        print(response.status_code)
        raise Exception(response.text)

In [None]:
# format_json_data iterates through the blob data and adds to a python dictionary.
# A dictionary is used as it dedups any duplicate timestamps. 
# The dictionary is later converted to a list of json objects.

def build_figure(sample_data, sensitivity):
    sample_data['sensitivity'] = sensitivity
    
    result = detect(anomaly_detector_endpoint, anomaly_detector_key, sample_data)
        
    columns = {'expectedValues': result['expectedValues'], 'isAnomaly': result['isAnomaly'], 'isNegativeAnomaly': result['isNegativeAnomaly'],
          'isPositiveAnomaly': result['isPositiveAnomaly'], 'upperMargins': result['upperMargins'], 'lowerMargins': result['lowerMargins'],
          'timestamp': [parser.parse(x['timestamp']) for x in sample_data['series']], 
          'value': [x['value'] for x in sample_data['series']]}
    response = pd.DataFrame(data=columns)
    values = response['value']
    label = response['timestamp']
    anomalies = []
    anomaly_labels = []
    index = 0
    anomaly_indexes = []
    p = figure(x_axis_type='datetime', title="Batch Anomaly Detection ({0} Sensitvity)".format(sensitivity), width=800, height=600)
    for anom in response['isAnomaly']:
        if anom == True and (values[index] > response.iloc[index]['expectedValues'] + response.iloc[index]['upperMargins'] or 
                         values[index] < response.iloc[index]['expectedValues'] - response.iloc[index]['lowerMargins']):
            anomalies.append(values[index])
            anomaly_labels.append(label[index])
            anomaly_indexes.append(index)
        index = index+1
    upperband = response['expectedValues'] + response['upperMargins']
    lowerband = response['expectedValues'] -response['lowerMargins']
    band_x = np.append(label, label[::-1])
    band_y = np.append(lowerband, upperband[::-1])
    boundary = p.patch(band_x, band_y, color=Blues4[2], fill_alpha=0.5, line_width=1, legend_label='Boundary')
    p.line(label, values, legend_label='Value', color="#2222aa", line_width=1)
    p.line(label, response['expectedValues'], legend_label='ExpectedValue',  line_width=1, line_dash="dotdash", line_color='olivedrab')
    anom_source = ColumnDataSource(dict(x=anomaly_labels, y=anomalies))
    anoms = p.circle('x', 'y', size=5, color='tomato', source=anom_source)
    p.legend.border_line_width = 1
    p.legend.background_fill_alpha  = 0.1
    show(p, notebook_handle=True)

## Vizualizing anomalies throughout your data

The following cells call the Anomaly Detector API with two different example time series data sets, and different sensitivities for anomaly detection. Varying the sensitivity of the Anomaly Detector API can improve how well the response fits your data.

### Example 1: time series with an hourly sampling frequency


In [None]:
# Get last X days of telemetry from Azure SQL via REST API

api_url = f"{inference_telemetry_endpoint_url}/{deviceId}/{inferenceDays}?code={inference_telemetry_endpoint_key}"

ts_dict = {}
df = pd.read_json(api_url, convert_dates=False)
if not df.empty:
    df.drop(columns=['humidity', 'prediction'], inplace=True, errors='raise')
    df.rename(columns={"temperature": "value"}, inplace=True, errors='raise')

    result = df.to_json(orient="records")
    ts_dict = json.loads(result)

    df
else:
    print(f"No data found for device {deviceId} in the last {inferenceDays} days")

In [None]:
# print(ts_dict)
sample_data = {}

if len(ts_dict) < 12:
    print("Climate data items: {count}".format(count=len(ts_dict)))
    print("12 Climate data items required")
    print("Wait for more data to be generated then try again")
else:
    # build sample data for anomaly detection

    
    sample_data['series'] = ts_dict
    sample_data['granularity'] = 'minutely'
    sample_data['customInterval'] = 10

In [None]:
# 95 sensitivity

if len(sample_data) > 0:
    build_figure(sample_data,95)

In [None]:
# 90 sensitivity

if len(sample_data) > 0:
    build_figure(sample_data,90)

In [None]:
#80 sensitivity

if len(sample_data) > 0:
    build_figure(sample_data,80)