### Anomaly Detection using the Azure Cognitive Services Detector
This notbook illustrates the example described in our blog post about anomaly detectors. It is based on examples from the Azure Cog services anomaly detection blogs,  but with different data. 

To use the cognitive service, you need to go to the Azure portal and then to cognitive services.  There you can use the search bar to look for the “Anomaly Detector” (at the time of this writing it is still in “preview”).  You will need to create an instance and that will get you an API key and an endpoint for billing.   

You should go to this page to see what is currently required to launch the container.  Assuming you have docker installed on a machine (your laptop or in the cloud), you must first pull the container.

docker pull containerpreview.azurecr.io/microsoft/cognitive-services-anomaly-detector:latest

Next you will use ApiKey and billing endpoint to launch the container.   This command works:

docker run --rm -it -p 5000:5000 containerpreview.azurecr.io/microsoft/cognitive-services-anomaly-detector:latest Eula=accept Billing={ENDPOINT_URI} ApiKey={API_KEY}



In [1]:
import requests
import json
import requests
import json
import pandas as pd
import numpy as np
#from __future__ import print_function
import warnings
warnings.filterwarnings('ignore')

# Import library to display results
import matplotlib.pyplot as plt
%matplotlib inline 

In [2]:
# To start sending requests to the Anomaly Detector API, paste your subscription key you received after creating Anomaly Detector resource. 
subscription_key = 'your-subscription-key-goes-here' 

# You will point your anomaly detection work at the container running on your "edge" device.
# in this case we ran it on the same host as the notebook.

endpoint = 'http://localhost:5000/anomalydetector/v1.0/timeseries/entire/detect'

In [3]:
from bokeh.plotting import figure,output_notebook, show
from bokeh.palettes import Blues4
from bokeh.models import ColumnDataSource,Slider
import datetime
from bokeh.io import push_notebook
from dateutil import parser
from ipywidgets import interact, widgets, fixed
output_notebook()

In [4]:
def detect(endpoint, subscription_key, request_data):
    headers = {'Content-Type': 'application/json-patch+json', 'Ocp-Apim-Subscription-Key': subscription_key}
    response = requests.post(endpoint, data=json.dumps(request_data), headers=headers)
    if response.status_code == 200:
        return json.loads(response.content.decode("utf-8"))
    else:
        print(response.status_code)
        raise Exception(response.text)

In [83]:
def build_figure(sample_data, sensitivity):
    sample_data['sensitivity'] = sensitivity
    result = detect(endpoint, subscription_key, sample_data)
    columns = {'expectedValues': result['expectedValues'], 'isAnomaly': result['isAnomaly'], 'isNegativeAnomaly': result['isNegativeAnomaly'],
          'isPositiveAnomaly': result['isPositiveAnomaly'], 'upperMargins': result['upperMargins'], 'lowerMargins': result['lowerMargins'],
          'timestamp': [parser.parse(x['timestamp']) for x in sample_data['series']], 
          'value': [x['value'] for x in sample_data['series']]}
    response = pd.DataFrame(data=columns)
    values = response['value']
    label = response['timestamp']
    anomalies = []
    anomaly_labels = []
    index = 0
    anomaly_indexes = []
    p = figure(x_axis_type='datetime', title="Batch Anomaly Detection ({0} Sensitvity)".format(sensitivity), width=800, height=400)
    for anom in response['isAnomaly']:
        if anom == True and (values[index] > response.iloc[index]['expectedValues'] + response.iloc[index]['upperMargins'] or 
                         values[index] < response.iloc[index]['expectedValues'] - response.iloc[index]['lowerMargins']):
            anomalies.append(values[index])
            anomaly_labels.append(label[index])
            anomaly_indexes.append(index)
        index = index+1
    print(anomaly_indexes)
    upperband = response['expectedValues'] + response['upperMargins']
    lowerband = response['expectedValues'] -response['lowerMargins']
    band_x = np.append(label, label[::-1])
    band_y = np.append(lowerband, upperband[::-1])
    boundary = p.patch(band_x, band_y, color=Blues4[2], fill_alpha=0.5, line_width=1, legend='Boundary')
    p.line(label, values, legend='Value', color="#2222aa", line_width=1)
    p.line(label, response['expectedValues'], legend='ExpectedValue',  line_width=1, line_dash="dotdash", line_color='olivedrab')
    anom_source = ColumnDataSource(dict(x=anomaly_labels, y=anomalies))
    anoms = p.circle('x', 'y', size=5, color='tomato', source=anom_source)
    p.legend.border_line_width = 1
    p.legend.background_fill_alpha  = 0.1
    show(p, notebook_handle=True)

In [3]:
TIME_SERIES_DATA_PATH = './instrument.csv'
# this is a data stream captured from an SO2 sensor on an early version of the 
#Argonne-Chicago “Array-of-Things” edge device

In [7]:
def load_data_from_csv(filename):
	rdelim = '\n'
	cdelim = ','
	data  = pd.read_csv(filename)
	return data

In [8]:
data = load_data_from_csv(TIME_SERIES_DATA_PATH )
datasorted = data.sort_values(by='timestamp')  #[0:1000]

In [9]:
def extract_all(row):
    key  = row['sensor']+'_'+row['parameter']
    return {'timestamp': str(pd.to_datetime(row['timestamp']))[:-10], 'value': row['value']}

In [19]:
z = datasorted.loc[datasorted['parameter'] == 'so2']

In [20]:
w = z.apply(extract_all, axis=1)

In [21]:
series = []
prev = '99'
for x in w:
    if len(x['timestamp']) > 12 and x['timestamp'][12:] != prev:
        series.append(x)
        prev = x['timestamp'][12:]

In [34]:
sample_data = {'granularity': 'minutely','series':series[200:850]}

In [46]:
build_figure(sample_data, 80)

[402]


In [47]:
sample_data['sensitivity']= 99
result = detect(endpoint, subscription_key, sample_data)
columns = {'expectedValues': result['expectedValues'], 'isAnomaly': result['isAnomaly'], 'isNegativeAnomaly': result['isNegativeAnomaly'],
          'isPositiveAnomaly': result['isPositiveAnomaly'], 'upperMargins': result['upperMargins'], 'lowerMargins': result['lowerMargins'],
          'timestamp': [parser.parse(x['timestamp']) for x in sample_data['series']], 
          'value': [x['value'] for x in sample_data['series']]}
response = pd.DataFrame(data=columns)
values = response['value']
label = response['timestamp']
anomalies = []
anomaly_labels = []
index = 0
anomaly_indexes = []
for anom in response['isAnomaly']:
    if anom == True:
        anomalies.append(values[index])
        anomaly_labels.append(label[index])
        anomaly_indexes.append(index)
    index = index+1
print(anomaly_indexes)

[402]


In [48]:
for i in range(len(sample_data['series'])):
    if result['isAnomaly'][i]== True:
        print('got one at ', i)


got one at  402


### now do detect from window

In [50]:
nendpoint = 'http://localhost:5000/anomalydetector/v1.0/timeseries/last/detect'

## now do it with sliding window
Now we will simulate "real time" anomaly detection.   To use the algorithm, at every "time step" we send a window of the last "window_size" values, but use the last value detecion mode.

In [51]:
import time

In [52]:
window_size = 100
anoms = []
for i in range(200, len(series[:800])-window_size+1):
    sample_data = {'granularity': 'minutely','series':series[i: i+window_size],  "sensitivity": 80}
    result =  detect(nendpoint, subscription_key, sample_data)
    isanom = result["isAnomaly"]
    #print(result)
    if(isanom == True):
        print('got a bad one at ', i+window_size-1)
        anoms.append(i+window_size-3)
    #print(isanom)
    time.sleep(0.05)

got a bad one at  604
got a bad one at  650


In [53]:
anoms

[602, 648]

In [78]:
def show_windows(data, bad_ones, sensitivity):
    p = figure(x_axis_type='datetime', title="Batch Anomaly Detection ({0} Sensitvity)".format(sensitivity), width=800, height=400)
    columns = {'timestamp': [parser.parse(x['timestamp']) for x in data], 
          'value': [x['value'] for x in data]}
    response = pd.DataFrame(data=columns)
    #print(response)
    values = response['value']
    labels= response['timestamp']
    anomalies = []
    anomaly_indexes = bad_ones
    anomaly_labels = []
    for i in anomaly_indexes:
        anomalies.append(values[i])
        anomaly_labels.append(labels[i])
    #print(anomalies)
    p.line(labels, values, legend='Value', color="#2222aa", line_width=1)
    anom_source = ColumnDataSource(dict(x=anomaly_labels, y=anomalies))
    p.circle('x', 'y', size=5, color='tomato', source=anom_source)
    p.legend.border_line_width = 1
    p.legend.background_fill_alpha  = 0.1
    show(p, notebook_handle=True)    

In [57]:
show_windows(series[:800], anoms, 80)

### now do water geos
This is from the Global Summary of the Day (GSOD) weather from the National Oceanographic and Atmospheric Administration (NOAA) for 9,000 weather stations between 1929 and 2016.  In particular, we will look at a sensor that briefly failed and we will see how well the anomaly detectors spot the problem

In [61]:
SKAJIT = './skajit.csv'

In [62]:
data = load_data_from_csv(SKAJIT )
data['timestamp'] = pd.to_datetime(data['timestamp'])
print(len(data['timestamp']))

365


In [63]:
def extract_all2(row):
    return {'timestamp': str(row['timestamp']), 'value': row['temperature']}


In [64]:
data

Unnamed: 0,temperature,timestamp
0,37.4,2015-01-01
1,37.4,2015-01-02
2,39.2,2015-01-03
3,46.4,2015-01-04
4,51.8,2015-01-05
...,...,...
360,41.0,2015-12-27
361,41.0,2015-12-28
362,41.0,2015-12-29
363,37.4,2015-12-30


In [65]:
w = data.apply(extract_all2, axis=1)

In [66]:
series = []
for x in w:
    series.append(x)
sample_data = {'granularity': 'daily','series':series}

In [119]:
window_size = 15
anoms = []
for i in range( len(series)-window_size+1):
    sample = {'granularity': 'daily','series':series[i: i+window_size],  "sensitivity": 80}
    result =  detect(nendpoint, subscription_key, sample)
    isanom = result["isAnomaly"]
    #print(result)
    if(isanom == True):
        print('got a bad one at ', i+window_size-1)
        anoms.append(i+window_size-3)
    #print(isanom)
    time.sleep(0.01)

got a bad one at  116
got a bad one at  259
got a bad one at  260
got a bad one at  261
got a bad one at  268
got a bad one at  269
got a bad one at  334
got a bad one at  335
got a bad one at  336


We first plot the full data stream using the entire mode.

In [120]:
build_figure(sample_data, 80)

[259, 260, 261, 262, 263, 264, 265, 266, 267]


Now we show the graph for the values from the sliding window.

In [121]:
show_windows(series,anoms, 0)

     timestamp  value
0   2015-01-01   37.4
1   2015-01-02   37.4
2   2015-01-03   39.2
3   2015-01-04   46.4
4   2015-01-05   51.8
..         ...    ...
360 2015-12-27   41.0
361 2015-12-28   41.0
362 2015-12-29   41.0
363 2015-12-30   37.4
364 2015-12-31   32.0

[365 rows x 2 columns]
[53.6, 64.4, 82.4, 107.6, 111.2, 111.2, 41.0, 39.2, 55.4]


## the fake sine wave data

In [67]:
fakedata = data.copy()

In [68]:
pi = 3.14
for i in range(len(fakedata)):
    fakedata['temperature'][i] = 0.5*np.sin(pi*i/10)+4.0
    pi = 0.9995*pi
fakedata['temperature'][120] =3
fakedata['temperature'][300] =5 
fakedata['temperature'][121] =5
fakedata['temperature'][301] =2

In [69]:
w = fakedata.apply(extract_all2, axis=1)
series = []
for x in w:
    series.append(x)
sample_data = {'granularity': 'daily','series':series}

In [70]:
window_size = 18
anoms = []
for i in range( len(series)-window_size+1):
    sample = {'granularity': 'daily','series':series[i: i+window_size],  "sensitivity": 90}
    result =  detect(nendpoint, subscription_key, sample)
    isanom = result["isAnomaly"]
    #print(result)
    if(isanom == True):
        print('got a bad one at ', i+window_size-1)
        anoms.append(i+window_size-1)
    #print(isanom)
    time.sleep(0.01)

got a bad one at  120
got a bad one at  123
got a bad one at  124
got a bad one at  300
got a bad one at  301


In [79]:
show_windows(series,anoms, 0)

In [84]:
build_figure(sample_data, 90)

[301]


### the long flat


In [330]:
fakedata = data.copy() 

In [331]:
pi = 3.14
for i in range(len(fakedata)):
    fakedata['temperature'][i] = 0.5*np.sin(pi*i/10)+4.0
    #pi = 0.9995*pi
for i in range(200,260):
    fakedata['temperature'][i] = 4

In [332]:
w = fakedata.apply(extract_all2, axis=1)
series = []
for x in w:
    series.append(x)
sample_data = {'granularity': 'daily','series':series}

In [333]:
window_size = 18
anoms = []
for i in range( len(series)-window_size+1):
    sample = {'granularity': 'daily','series':series[i: i+window_size],  "sensitivity": 99}
    result =  detect(nendpoint, subscription_key, sample)
    isanom = result["isAnomaly"]
    #print(result)
    if(isanom == True):
        print('got a bad one at ', i+window_size-1)
        anoms.append(i+window_size-1)
    #print(isanom)
    time.sleep(0.01)

got a bad one at  261
got a bad one at  262
got a bad one at  263
got a bad one at  264
got a bad one at  265


In [335]:
show_windows(series,anoms, 0)

[4.134613837692779, 4.276762562182332, 4.3918470742184645, 4.468613425076003, 4.499554743838239]


In [336]:
build_figure(sample_data, 98)

[201, 202, 203, 204, 205, 206, 207, 208, 209, 211, 212, 213, 214, 215, 216, 217, 218, 219, 221, 222, 223, 224, 225, 226, 227, 228, 229, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 243, 244, 245, 246, 247, 248, 249, 251, 252, 253, 254, 255, 256, 257, 258, 259]
