In [8]:
import os
import time
from datetime import datetime, timezone
import requests
import json
import pandas as pd
import numpy as np

from azure.ai.anomalydetector import AnomalyDetectorClient
from azure.ai.anomalydetector.models import ModelInfo
from azure.ai.anomalydetector.models import ModelStatus
from azure.core.credentials import AzureKeyCredential



class MultivariateSample:

    def __init__(self, subscription_key, anomaly_detector_endpoint, data_source=None):
        self.sub_key = subscription_key
        self.end_point = anomaly_detector_endpoint
        # Create an Anomaly Detector client

        # <client>
        self.ad_client = AnomalyDetectorClient(AzureKeyCredential(self.sub_key), self.end_point)
        # </client>

        self.data_source = data_source

    def train(self, start_time, end_time):
        # Number of models available now
        model_list = list(self.ad_client.list_multivariate_model(skip=0, top=10000))
        print("{:d} available models before training.".format(len(model_list)))

        # Use sample data to train the model
        print("Training new model...(it may take a few minutes)")
        data_feed = ModelInfo(start_time=start_time, end_time=end_time, source=self.data_source)
        response_header = \
            self.ad_client.train_multivariate_model(data_feed, cls=lambda *args: [args[i] for i in range(len(args))])[-1]
        trained_model_id = response_header['Location'].split("/")[-1]

        # Wait until the model is ready. It usually takes several minutes
        model_status = None

        while model_status != ModelStatus.READY and model_status != ModelStatus.FAILED:
            model_info = self.ad_client.get_multivariate_model(trained_model_id).model_info
            model_status = model_info.status
            time.sleep(10)

        if model_status == ModelStatus.FAILED:
            print("Creating model failed.")
            print("Errors:")
            if model_info.errors:
                for error in model_info.errors:
                    print("Error code: {}. Message: {}".format(error.code, error.message))
            else:
                print("None")
            return None

        if model_status == ModelStatus.READY:
            # Model list after training
            new_model_list = list(self.ad_client.list_multivariate_model(skip=0, top=10000))

            print("Done.\n--------------------")
            print("{:d} available models after training.".format(len(new_model_list)))

            # Return the latest model id
            return trained_model_id


    def detect(self, model_id, start_time, end_time):
        # Detect anomaly in the same data source (but a different interval)
        try:
            detection_req = DetectionRequest(source=self.data_source, start_time=start_time, end_time=end_time)
            response_header = self.ad_client.detect_anomaly(model_id, detection_req,
                                                            cls=lambda *args: [args[i] for i in range(len(args))])[-1]
            result_id = response_header['Location'].split("/")[-1]

            # Get results (may need a few seconds)
            r = self.ad_client.get_detection_result(result_id)
            print("Get detection result...(it may take a few seconds)")

            while r.summary.status != DetectionStatus.READY and r.summary.status != DetectionStatus.FAILED:
                r = self.ad_client.get_detection_result(result_id)
                time.sleep(1)

            if r.summary.status == DetectionStatus.FAILED:
                print("Detection failed.")
                print("Errors:")
                if r.summary.errors:
                    for error in r.summary.errors:
                        print("Error code: {}. Message: {}".format(error.code, error.message))
                else:
                    print("None")
                return None

        except HttpResponseError as e:
            print('Error code: {}'.format(e.error.code), 'Error message: {}'.format(e.error.message))
        except Exception as e:
            raise e

        return r

    def export_model(self, model_id, model_path="model.zip"):

        # Export the model
        model_stream_generator = self.ad_client.export_model(model_id)
        with open(model_path, "wb") as f_obj:
            while True:
                try:
                    f_obj.write(next(model_stream_generator))
                except StopIteration:
                    break
                except Exception as e:
                    raise e

    def delete_model(self, model_id):

        # Delete the mdoel
        self.ad_client.delete_multivariate_model(model_id)
        model_list_after_delete = list(self.ad_client.list_multivariate_model(skip=0, top=10000))
        print("{:d} available models after deletion.".format(len(model_list_after_delete)))


if __name__ == '__main__':
    
    # SUBSCRIPTION_KEY = os.environ["ANOMALY_DETECTOR_KEY"]
    SUBSCRIPTION_KEY = "ANOMALY_DETECTOR_KEY"
    # ANOMALY_DETECTOR_ENDPOINT = os.environ["ANOMALY_DETECTOR_ENDPOINT"]
    ANOMALY_DETECTOR_ENDPOINT = "ANOMALY_DETECTOR_ENDPOINT"

    # *****************************
    # Use your own data source here
    # *****************************
    data_source = "https://badsboys.blob.core.windows.net/trainingdataanomaly/TrainingData.zip?sp=r&st=2022-06-18T19:16:49Z&se=2022-06-19T03:16:49Z&spr=https&sv=2021-06-08&sr=b&sig=G5O7PzsQLslM%2FIVknMJz1IDZBqWqXJo7WUuWmLNZPB4%3D"
    # Create a new sample and client
    sample = MultivariateSample(SUBSCRIPTION_KEY, ANOMALY_DETECTOR_ENDPOINT, data_source)

    # Train a new model
    model_id = sample.train(datetime(2022, 6, 3, 13, 11, 10, tzinfo=timezone.utc),
                            datetime(2022, 6, 3, 14, 41, 53, tzinfo=timezone.utc))
    assert model_id is not None

    print("Model ID:\t", model_id)

0 available models before training.
Training new model...(it may take a few minutes)
Done.
--------------------
1 available models after training.
Model ID:	 41a68a3a-ef3e-11ec-9182-4e01098ecd37


In [9]:
ENDPOINT = "badsboysanomalydetection.cognitiveservices.azure.com/anomalydetector/v1.1-preview.1"
HEADERS = {
    "Ocp-Apim-Subscription-Key": "8ba5b3a75b5d4ee79cf7217d26ca6a38"
}

In [10]:

API_MODEL_INFERENCE = "https://{endpoint}/multivariate/models/{model_id}/detect"
API_RESULTS = "https://{endpoint}/multivariate/results/{result_id}"
SOURCE_BLOB_SAS = data_source

In [43]:
data = {
    'source': SOURCE_BLOB_SAS,
    'startTime': '2022-06-03T13:17:54Z', 
    'endTime': '2022-06-03T13:51:06Z', 

    # 'startTime': '2022-06-03T13:43:51Z', 
    # 'endTime': '2022-06-03T14:15:23Z', 
}

res = requests.post(API_MODEL_INFERENCE.format(endpoint=ENDPOINT, model_id=model_id), 
                    data=json.dumps(data), headers=HEADERS)
assert res.status_code == 201, f"Error occured. Error message: {res.content}"

result_id = res.headers['location'].split("/")[-1]

print(res.content)

print(f"result id = {result_id}")

b'"Success"\n'
result id = 1242349c-ef48-11ec-95dd-da0565d36a5d


In [44]:
res = requests.get(API_RESULTS.format(endpoint=ENDPOINT, result_id=result_id), headers=HEADERS)
assert res.status_code == 200, f"Error occured. Error message: {res.content}"

status = None
while status != "READY"and status != "FAILED":
    res = requests.get(API_RESULTS.format(endpoint=ENDPOINT, result_id=result_id), headers=HEADERS)
    assert res.status_code == 200, f"Error occured. Error message: {res.content}"

    result = res.json()
    status = result['summary']['status']

    time.sleep(60)

if status == "FAILED":
    print("Detection failed.")
    print("Errors:")
    if res.summary.errors:
        for error in res.summary.errors:
            print("Error code: {}. Message: {}".format(error.code, error.message))
    else:
        print("None")

print('Done')

Done


In [45]:
columns = {'timestamp':[result['results'][x]['timestamp'] for x in range(len(result['results']))],
'isAnomaly':[result['results'][x]['value']['isAnomaly'] for x in range(len(result['results']))]
}

In [46]:
response = pd.DataFrame(data=columns)
response.head()

Unnamed: 0,timestamp,isAnomaly
0,2022-06-03T13:17:54Z,False
1,2022-06-03T13:17:54Z,False
2,2022-06-03T13:17:54Z,False
3,2022-06-03T13:17:54Z,False
4,2022-06-03T13:17:54Z,False


In [47]:
response.dropna(inplace=True)
response.head()

Unnamed: 0,timestamp,isAnomaly
0,2022-06-03T13:17:54Z,False
1,2022-06-03T13:17:54Z,False
2,2022-06-03T13:17:54Z,False
3,2022-06-03T13:17:54Z,False
4,2022-06-03T13:17:54Z,False


In [48]:
table = pd.pivot_table(response[['timestamp', 'isAnomaly']], values = ['isAnomaly'], index=['timestamp'],
                    columns=['isAnomaly'], aggfunc=np.count_nonzero)
table.head()

isAnomaly,False,True
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-06-03T13:17:54Z,10.0,
2022-06-03T13:17:55Z,10.0,
2022-06-03T13:17:56Z,10.0,
2022-06-03T13:17:57Z,10.0,
2022-06-03T13:17:58Z,10.0,


In [49]:
table.fillna(0, inplace=True)

In [50]:
table['Anomaly'] = table[True]>table[False]

In [51]:
table

isAnomaly,False,True,Anomaly
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-06-03T13:17:54Z,10.0,0.0,False
2022-06-03T13:17:55Z,10.0,0.0,False
2022-06-03T13:17:56Z,10.0,0.0,False
2022-06-03T13:17:57Z,10.0,0.0,False
2022-06-03T13:17:58Z,10.0,0.0,False
...,...,...,...
2022-06-03T13:51:01Z,10.0,0.0,False
2022-06-03T13:51:02Z,8.0,4.0,False
2022-06-03T13:51:03Z,7.0,6.0,False
2022-06-03T13:51:04Z,10.0,0.0,False


In [53]:
table_3x = pd.concat([table]*3)
table_3x.sort_values(by=['timestamp'], inplace=True)
table_3x

isAnomaly,False,True,Anomaly
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-06-03T13:17:54Z,10.0,0.0,False
2022-06-03T13:17:54Z,10.0,0.0,False
2022-06-03T13:17:54Z,10.0,0.0,False
2022-06-03T13:17:55Z,10.0,0.0,False
2022-06-03T13:17:55Z,10.0,0.0,False
...,...,...,...
2022-06-03T13:51:04Z,10.0,0.0,False
2022-06-03T13:51:04Z,10.0,0.0,False
2022-06-03T13:51:05Z,10.0,0.0,False
2022-06-03T13:51:05Z,10.0,0.0,False


In [54]:
table_3x.reset_index(inplace=True)

In [55]:
anomaly = table_3x[table_3x['Anomaly']==True]
# ts = anomaly['timestamp']

In [56]:
anomaly

isAnomaly,timestamp,False,True,Anomaly
48,2022-06-03T13:18:10Z,3.0,14.0,True
49,2022-06-03T13:18:10Z,3.0,14.0,True
50,2022-06-03T13:18:10Z,3.0,14.0,True
489,2022-06-03T13:20:37Z,5.0,10.0,True
490,2022-06-03T13:20:37Z,5.0,10.0,True
...,...,...,...,...
5731,2022-06-03T13:49:44Z,6.0,8.0,True
5732,2022-06-03T13:49:44Z,6.0,8.0,True
5868,2022-06-03T13:50:30Z,5.0,10.0,True
5869,2022-06-03T13:50:30Z,5.0,10.0,True


In [57]:
anomaly.to_csv('anomaly2.csv')

In [59]:
pos = pd.read_csv(
    'pos.csv', 
)
pos

Unnamed: 0,timestamp,latitude,longitude
0,2022-06-03T13:10:53Z,13.79474,100.65597
1,2022-06-03T13:11:02Z,13.79474,100.65597
2,2022-06-03T13:11:10Z,13.79474,100.65597
3,2022-06-03T13:11:10Z,13.79476,100.65597
4,2022-06-03T13:11:10Z,13.79477,100.65629
...,...,...,...
16147,2022-06-03T17:10:40Z,13.80371,100.66728
16148,2022-06-03T17:10:41Z,13.80371,100.66728
16149,2022-06-03T17:10:42Z,13.80371,100.66728
16150,2022-06-03T17:10:43Z,13.80371,100.66728


In [64]:
pos_ano = table[table['Anomaly']==True].merge(pos, how='left', on='timestamp')

In [67]:
pos_ano.drop_duplicates(inplace=True)
pos_ano

Unnamed: 0,timestamp,False,True,Anomaly,latitude,longitude
0,2022-06-03T13:18:10Z,3.0,14.0,True,13.8013,100.64848
2,2022-06-03T13:20:37Z,5.0,10.0,True,13.81249,100.64656
4,2022-06-03T13:21:27Z,6.0,8.0,True,13.81607,100.64131
6,2022-06-03T13:23:20Z,6.0,8.0,True,13.82103,100.63314
8,2022-06-03T13:23:27Z,5.0,10.0,True,13.82121,100.6329
10,2022-06-03T13:23:28Z,6.0,8.0,True,13.82126,100.63285
12,2022-06-03T13:26:59Z,6.0,8.0,True,13.82491,100.62885
14,2022-06-03T13:27:00Z,6.0,8.0,True,13.82493,100.62883
16,2022-06-03T13:27:05Z,6.0,8.0,True,13.8251,100.62866
18,2022-06-03T13:27:07Z,6.0,8.0,True,13.82519,100.62857


In [71]:
img_class = pd.read_csv(
    'image_classification.csv', 
    skiprows=1, 
    names=['timestamp','image_class','image_name']
)
img_class

Unnamed: 0,timestamp,image_class,image_name
0,2022-06-03T13:18:10Z,longitudinal,https://roadanomalydetect.blob.core.windows.ne...
1,2022-06-03T13:20:37Z,pothole,https://roadanomalydetect.blob.core.windows.ne...
2,2022-06-03T13:21:27Z,lateral,https://roadanomalydetect.blob.core.windows.ne...
3,2022-06-03T13:23:20Z,repaired,https://roadanomalydetect.blob.core.windows.ne...
4,2022-06-03T13:23:27Z,pothole,https://roadanomalydetect.blob.core.windows.ne...
5,2022-06-03T13:23:28Z,pothole,https://roadanomalydetect.blob.core.windows.ne...
6,2022-06-03T13:26:59Z,pothole,https://roadanomalydetect.blob.core.windows.ne...
7,2022-06-03T13:27:00Z,pothole,https://roadanomalydetect.blob.core.windows.ne...
8,2022-06-03T13:27:05Z,longitudinal,https://roadanomalydetect.blob.core.windows.ne...
9,2022-06-03T13:27:07Z,longitudinal,https://roadanomalydetect.blob.core.windows.ne...


In [72]:
df_final = pos_ano.merge(img_class, how='inner', on='timestamp')
df_final

Unnamed: 0,timestamp,False,True,Anomaly,latitude,longitude,image_class,image_name
0,2022-06-03T13:18:10Z,3.0,14.0,True,13.8013,100.64848,longitudinal,https://roadanomalydetect.blob.core.windows.ne...
1,2022-06-03T13:20:37Z,5.0,10.0,True,13.81249,100.64656,pothole,https://roadanomalydetect.blob.core.windows.ne...
2,2022-06-03T13:21:27Z,6.0,8.0,True,13.81607,100.64131,lateral,https://roadanomalydetect.blob.core.windows.ne...
3,2022-06-03T13:23:20Z,6.0,8.0,True,13.82103,100.63314,repaired,https://roadanomalydetect.blob.core.windows.ne...
4,2022-06-03T13:23:27Z,5.0,10.0,True,13.82121,100.6329,pothole,https://roadanomalydetect.blob.core.windows.ne...
5,2022-06-03T13:23:28Z,6.0,8.0,True,13.82126,100.63285,pothole,https://roadanomalydetect.blob.core.windows.ne...
6,2022-06-03T13:26:59Z,6.0,8.0,True,13.82491,100.62885,pothole,https://roadanomalydetect.blob.core.windows.ne...
7,2022-06-03T13:27:00Z,6.0,8.0,True,13.82493,100.62883,pothole,https://roadanomalydetect.blob.core.windows.ne...
8,2022-06-03T13:27:05Z,6.0,8.0,True,13.8251,100.62866,longitudinal,https://roadanomalydetect.blob.core.windows.ne...
9,2022-06-03T13:27:07Z,6.0,8.0,True,13.82519,100.62857,longitudinal,https://roadanomalydetect.blob.core.windows.ne...


In [74]:
df_final[['timestamp','latitude', 'longitude', 'image_class','image_name']].to_csv('anomaly_final.csv', index=False)