# Azure Anomaly Detector - Air Passengers

In [None]:
#pip install azure-ai

In [None]:
#pip install azureml

In [None]:
#pip install azure-ai-ml

In [None]:
#pip install azure

In [None]:
#pip install --force-reinstall azure.ai.anomalydetector==3.0.0b1

In [None]:
#pip install --upgrade azure.ai.anomalydetector

In [None]:
import warnings
warnings.filterwarnings(action="ignore")

import requests
import json

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure

from datetime import datetime, timedelta

from sklearn.metrics import mean_squared_error
from math import sqrt

from azure.ai.anomalydetector import AnomalyDetectorClient
from azure.core.credentials import AzureKeyCredential
from azure.ai.anomalydetector.models import *

In [None]:
# Cargamos el dataset
real = pd.read_csv('data/AirPassengers.csv', header = 0, parse_dates = [0], names = ['Month', 'Passengers'], index_col = 0)

ax = real.plot(figsize=(18,6), title='Custom Time Series');
ax.set_xlabel("A침os");
ax.set_ylabel("# Pasajeros");

In [None]:
def splitSet(df, TEST_SIZE):
    
    # Train set
    train_full = df[:(int(len(df)*(1-TEST_SIZE)))]
    train = train_full[:(int(len(train_full)*(1-TEST_SIZE)))]

    # Validation set
    valid = train_full[(int(len(train_full)*(1-TEST_SIZE))):]

    # Test set
    test = df[(int(len(df)*(1-TEST_SIZE))):]

    return train, valid, test

In [None]:
# Split dataset
TEST_SIZE = 0.24
train, valid, test = splitSet(real, TEST_SIZE)

# Trabajamos s칩lo con train y test
train = pd.concat([train, valid], axis=0)

# Visualizaci칩n
ax = real.plot(figsize=(18,6), title='Pasajeros de avi칩n');

init__limit = real.index[0]
train_limit = real.index[len(train)-1]
test__limit = real.index[len(train)+len(test)-1]

ax.axvline(train_limit, linestyle='--', color='blue')
ax.axvline(test__limit, linestyle='--', color='red')

ax.axvspan(init__limit, train_limit, alpha=0.1, color='blue')
ax.axvspan(train_limit, test__limit, alpha=0.1, color='red')

ax.text(real.index[int((len(train)-1)*0.5)], 610, 'Train set', fontsize=18)
ax.text(real.index[int(len(train)+(len(test)-1)*0.3)], 610, 'Test set', fontsize=18)

ax.grid(linestyle='--')

ax.set_xlabel("Fecha");
ax.set_ylabel("Pasajeros");

In [None]:
airPassMetrics = pd.DataFrame(columns=['Model', 'RMSE'])

## Azure Anomaly Detector

In [None]:
SUBSCRIPTION_KEY = 'XXX'
ANOMALY_DETECTOR_ENDPOINT = 'https://XXX.cognitiveservices.azure.com/'

FORMAT_TIME = '%Y-%m-%d'

GRANULARITY = 'monthly'
SENSITIVITY = 90
CUSTOM_INTERVAL=1

In [None]:
# Create the result dataframe
dt_detect = pd.DataFrame()

# Drop Duplicates
dt_detect = real.drop_duplicates()

# Order by timestamp (important to use the Anomaly Detector API)
dt_detect.index = pd.to_datetime(dt_detect.index, format=FORMAT_TIME, utc=True)

# To work with Anomaly API, at least 12 points of data.
if len(dt_detect) >= 12:

    # Create the dataframe will be used on API
    anom = pd.DataFrame()
    anom['ds'] = dt_detect.index
    anom['value'] = dt_detect['Passengers'].values

    # Client to work with the API
    client = AnomalyDetectorClient(ANOMALY_DETECTOR_ENDPOINT, AzureKeyCredential(SUBSCRIPTION_KEY))

    # Create the series as TimeSeriesPoint object
    series = []
    for index, row in anom.iterrows():
        series.append(TimeSeriesPoint(timestamp=row[0], value=row[1]))

    # Detect anomalies - Entire Series
    request = UnivariateDetectionOptions(series=series, granularity=TimeGranularity.DAILY, sensitivity=SENSITIVITY)

    
    series = []
    for index, row in data_file.iterrows():
        series.append(TimeSeriesPoint(timestamp=row[0], value=row[1]))

    request = UnivariateDetectionOptions(
        series=series,
        granularity=TimeGranularity.DAILY,
    )
        
    try:
        change_point_response = client.detect_univariate_change_point(request)
        anomaly_response = client.detect_univariate_entire_series(request)
    except AnomalyDetectorError as e:
        print('Error code: {}'.format(e.error.code), 'Error message: {}'.format(e.error.message))
    except Exception as e:
        print(e)
        
    # Save the anomaly information on the dataset
    dt_detect['expectedValues'] = response.expected_values
    dt_detect['upperMargins'] = response.upper_margins
    dt_detect['lowerMargins'] = response.lower_margins
    dt_detect['isAnomaly'] = response.is_anomaly
    dt_detect['isPositiveAnomaly'] = response.is_positive_anomaly
    dt_detect['isNegativeAnomaly'] = response.is_negative_anomaly
    dt_detect['period'] = response.period

In [None]:
dt_detect.head()

In [None]:
# Model
MODEL = 'Azure Anomaly Detector'

# Get Train and Test Set Predictions
train_pred, valid_pred, test_pred = splitSet(dt_detect)
train_pred = pd.concat([train_pred, valid_pred], axis=0)

# Get RMSE from Test Set
real_values = test_pred['#Passengers']
pred_values = test_pred['expectedValues']
rmse = sqrt(mean_squared_error(real_values, pred_values))

# Preparing data to plot
real_plot = real['#Passengers']
pred_plot = dt_detect['expectedValues']
up_margin = dt_detect.apply(lambda x: x['expectedValues'] + (x['upperMargins']), axis=1)
lowmargin = dt_detect.apply(lambda x: x['expectedValues'] - (x['lowerMargins']), axis=1)

# Visualization
figure(figsize=(18, 6))

real_plot.plot(label='Real Value')
pred_plot.plot(label='Predictions', color='red')
plt.suptitle(MODEL + ' - RMSE: ' + str(np.round(rmse, 3)), fontsize=20)
plt.title('Sensitivity: ' + str(SENSITIVITY) + ' -> #Anomalies: ' + str(len(dt_detect[dt_detect['isAnomaly']==True])), fontsize=18)

plt.axvline(train_limit, linestyle='--', color='blue');
plt.axvline(test__limit, linestyle='--', color='red');

plt.axvspan(init__limit, train_limit, alpha=0.1, color='blue');
plt.axvspan(train_limit, test__limit, alpha=0.1, color='red');

up_margin.plot(label='Up Boundary', linestyle='--', color='cyan')
lowmargin.plot(label='Low Boundary', linestyle='--', color='cyan')

plt.fill_between(up_margin.index, up_margin, lowmargin, alpha=0.2, color='c')

for line in dt_detect.index[dt_detect.isAnomaly==True].tolist():
    YMAX = (dt_detect[dt_detect.index == line]['#Passengers'][0] / max(real_plot))-0.05
    plt.axvline(line, linestyle=':', color='green', ymax=YMAX)
    
plt.legend();
plt.grid()