In [None]:
%pip install azure-storage-blob
%pip install requests 
%pip install python-dotenv
%pip install bokeh markupsafe

In [68]:
from pathlib import Path
from dotenv import load_dotenv

env_path = Path('.') / 'set-env-local.env'
load_dotenv(dotenv_path=env_path)

True

In [65]:
import shutil

def create_prepped_dataset(input_dataset, output_file_path):
    os.makedirs(output_file_path, exist_ok=True)
    data_df = pd.read_csv(input_dataset, sep=",", header=0)
    data_df1 = data_df[data_df['NODE_ID'] == test_node_id]
    data_df1[['TimeStamp', 'T1']] =  data_df1['Time'].str.split("+", expand=True)
    df1 = pd.DataFrame().assign(timestamp=data_df1['TimeStamp'], value=data_df1['MI_1'])
    df2 = pd.DataFrame().assign(timestamp=data_df1['TimeStamp'], value=data_df1['MI_2'])
    df3 = pd.DataFrame().assign(timestamp=data_df1['TimeStamp'], value=data_df1['MI_3'])
    df4 = pd.DataFrame().assign(timestamp=data_df1['TimeStamp'], value=data_df1['MI_4'])
    df1.to_csv(output_file_path + "\MI_1.csv", header=1, index=0)
    df2.to_csv(output_file_path + "\MI_2.csv", header=1, index=0)
    df3.to_csv(output_file_path + "\MI_3.csv", header=1, index=0)
    df4.to_csv(output_file_path + "\MI_4.csv", header=1, index=0)    
    shutil.make_archive(output_file_path, 'zip', output_file_path)

In [70]:
import uuid
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__
from azure.storage.blob import ResourceTypes, AccountSasPermissions, generate_account_sas
from datetime import datetime, timedelta

connect_str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING')
#Create the BlobServiceClient object which will be used to create a container client
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
def upload_dataset(input_file_name):
    # Create a unique name for the container
    container_name = str(uuid.uuid4())

    # Create the container
    container_client = blob_service_client.create_container(container_name)
    blob_client = blob_service_client.get_blob_client(container=container_name, blob=input_file_name)
    with open(input_file_name, "rb") as data:
        blob_client.upload_blob(data)
    return blob_client.url

def get_sas_token():   
    sas_token = generate_account_sas(
                blob_service_client.account_name,
                account_key=blob_service_client.credential.account_key,
                resource_types=ResourceTypes(object=True),
                permission=AccountSasPermissions(read=True),
                expiry=datetime.utcnow() + timedelta(hours=1)
            )
    return sas_token


In [None]:
import pandas as pd
import os

test_node_id = '' #input node id for to be used for trainig the MVAD model
training_dataset_filename = test_node_id + '_training_dataset_type_1'
create_prepped_dataset("Datasets-2\\Dataset_Type_1.csv",training_dataset_filename)
SOURCE_BLOB_SAS = upload_dataset(training_dataset_filename + ".zip") + "?" + get_sas_token()

In [73]:
import requests
import json

In [74]:
ENDPOINT = os.environ.get('ENDPOINT')
HEADERS = {
    "Ocp-Apim-Subscription-Key": os.environ.get('OCP_APIM_SUBCRIPTION_KEY')
}

In [75]:
API_MODEL = "https://{endpoint}/multivariate/models"
API_MODEL_STATUS = "https://{endpoint}/multivariate/models/{model_id}"
API_MODEL_INFERENCE = "https://{endpoint}/multivariate/models/{model_id}/detect"
API_RESULTS = "https://{endpoint}/multivariate/results/{result_id}"
API_EXPORT = "https://{endpoint}/multivariate/models/{model_id}/export"
API_DELETE = "https://{endpoint}/multivariate/models/{model_id}"
 


In [None]:
res = requests.get(API_MODEL.format(endpoint=ENDPOINT), headers=HEADERS)
assert res.status_code == 200, f"Error occured. Error message: {res.content}"
print(res.content)

In [77]:
SLIDING_WINDOW = 500
data = {
    'slidingWindow': SLIDING_WINDOW,
    'alignPolicy': {
        'alignMode': 'Inner',
        'fillNAMethod': 'Linear', 
        'paddingValue': 0
    },
    'source': SOURCE_BLOB_SAS,
    'startTime': '2021-06-09 00:00:00', 
    'endTime': '2021-07-09 03:00:00', 
    'displayName': 'datasetrequest'
}

res = requests.post(API_MODEL.format(endpoint=ENDPOINT), data=json.dumps(data), headers=HEADERS)
assert res.status_code == 201, f"Error occured. Error message: {res.content}"
print(res.content)
location = res.headers['Location']
print(location)
model_id = location[location.rindex('/')+1:]
print(model_id)

b'"Success"\n'
https://anildwaanomalydetector2.cognitiveservices.azure.com:443/anomalydetector/v1.1-preview/multivariate/models/e6fe9770-d265-11ec-a8e9-be621daea75b
e6fe9770-d265-11ec-a8e9-be621daea75b


In [91]:
res = requests.get(API_MODEL_STATUS.format(endpoint=ENDPOINT, model_id = model_id), headers=HEADERS)
assert res.status_code == 200, f"Error occured. Error message: {res.content}"
res_content = json.loads(res.content)
print(json.dumps(res_content))
print(res_content['modelInfo']['status'])

{"modelId": "e6fe9770-d265-11ec-a8e9-be621daea75b", "createdTime": "2022-05-13T02:39:25Z", "lastUpdatedTime": "2022-05-13T02:40:06Z", "modelInfo": {"source": "https://anildwablobstorage.blob.core.windows.net/1d4312e2-ee47-443a-8ac8-57d91a5b2c48/LANNH402A21_training_dataset_type_1.zip?se=2022-05-13T03%3A38%3A08Z&sp=r&sv=2021-06-08&ss=b&srt=o&sig=b4xM9wfFSKzsLpB6Y7MdUDNCzCHROjR3slmTj2W5kk8%3D", "startTime": "2021-06-09T00:00:00Z", "endTime": "2021-07-09T03:00:00Z", "displayName": "datasetrequest", "slidingWindow": 500, "alignPolicy": {"alignMode": "Inner", "fillNAMethod": "Linear", "paddingValue": 0}, "status": "READY", "errors": [], "diagnosticsInfo": {"modelState": {"epochIds": [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], "trainLosses": [0.8735912963747978, 0.47482868656516075, 0.38847823813557625, 0.35556162148714066, 0.31685252115130424, 0.3195141330361366, 0.3125189319252968, 0.306859590113163, 0.3108766935765743, 0.2949559595435858], "validationLosses": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

In [None]:
test_node_id = '' #input node id for to be used for inferencing
inference_dataset_filename = test_node_id + '_inference_dataset_type_1'
create_prepped_dataset("Datasets-2\\Dataset_Type_1.csv",inference_dataset_filename)
INFERENCE_SOURCE_BLOB_SAS = upload_dataset(inference_dataset_filename + ".zip") + "?" + get_sas_token()

In [93]:
data = {
    'source': INFERENCE_SOURCE_BLOB_SAS,
     'startTime': '2021-06-09 00:00:00', 
    'endTime': '2021-07-09 03:00:00', 
}

res = requests.post(API_MODEL_INFERENCE.format(endpoint=ENDPOINT, model_id=model_id), 
                    data=json.dumps(data), headers=HEADERS)
assert res.status_code == 201, f"Error occured. Error message: {res.content}"
print(res.content)
result_id = res.headers['location'].split("/")[-1]
print(f"result id = {result_id}")

b'"Success"\n'
result id = 4f2327a8-d266-11ec-a8e9-be621daea75b


In [None]:
res = requests.get(API_RESULTS.format(endpoint=ENDPOINT, result_id=result_id), headers=HEADERS)
assert res.status_code == 200, f"Error occured. Error message: {res.content}"
print(res.content)

In [98]:
from bokeh.io import output_file, show, output_notebook, save
from bokeh.layouts import gridplot
from bokeh.plotting import figure
from matplotlib import pyplot
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.palettes import Dark2_5 as palette
import pandas as pd
import numpy as np
import os
import itertools  
import shutil
import uuid
import zipfile
from urllib.request import urlretrieve
%matplotlib inline
output_notebook()

def unzip_file(zip_src, dst_dir):
    r = zipfile.is_zipfile(zip_src)
    if r:
        fz = zipfile.ZipFile(zip_src, 'r')
        print(fz)
        for file in fz.namelist():
            fz.extract(file, dst_dir)
    else:
        print('This is not zip')
        
def load_data(local_data_path, start, end):
    new_dir = os.path.join('.', str(uuid.uuid1()))
    shutil.rmtree(new_dir, ignore_errors=True)
    os.mkdir(new_dir)
    unzip_file(local_data_path, new_dir)
    files = os.listdir(new_dir)
    frames = []
    for file in files:
        if file[-4:] != '.csv':
            continue
        frame = pd.read_csv('{}\\{}'.format(new_dir, file))
        var = file[:file.find('.csv')]
        frame = frame.rename(columns={'value': var})
        frame = frame[frame['timestamp'] >= start]
        frame = frame[frame['timestamp'] <= end]
        frame['timestamp'] = pd.to_datetime(frame['timestamp'])
        frame.set_index(['timestamp'], inplace=True)
        frames.append(frame)
    shutil.rmtree(new_dir, ignore_errors=True)
    return frames


def plot_lines_multi(x, y, p, color, name, t_str="hover,save,pan,box_zoom,reset,wheel_zoom", t_loc='above'):
    '''...
    '''
    p.line(x, y, color=color, legend_label=name)
    p.legend.location = "top_left"
    p.legend.click_policy="hide"

def draw(data_source, local_data_path, result_id, sensitivity, start, end):
    urlretrieve(data_source, local_data_path)
    print(local_data_path, result_id, sensitivity, start, end)
    series = load_data(local_data_path, start, end)
    p_list = []
    colors = itertools.cycle(palette)
    # p_value = figure(background_fill_color="#fafafa", x_axis_type="datetime")
    for var, color in zip(series, colors):
        name = var.columns.values[0]
        p_value = figure(background_fill_color="#fafafa", x_axis_type="datetime")
        plot_lines_multi(var.index, var[name], p_value, color, name)
        p_list.append(p_value)
    header = HEADERS
    raw_result = json.loads(requests.get(API_RESULTS.format(endpoint=ENDPOINT, result_id=result_id), headers=header).content)
    if raw_result['summary']['status'] != 'READY':
        print("result not ready")
        return
    filter_item = list(filter(lambda x: 'value' in x and 'isAnomaly' in x['value'], raw_result['results']))
    timestamps = [item['timestamp'] for item in filter_item]
    isAnomaly = [item['value']['isAnomaly'] for item in filter_item]
    RawScore = [item['value']['score'] for item in filter_item]
    Severity = [item['value']['severity'] for item in filter_item]
    result = pd.DataFrame({'Timestamp': timestamps, 'isAnomaly': isAnomaly, 'RawScore': RawScore, 'Severity': Severity})
    result['Timestamp'] = pd.to_datetime(result['Timestamp'])
    result.loc[(result.Severity <= (1 - sensitivity)) & (result.isAnomaly == True), 'isAnomaly'] = False
    result['Timestamp'] = pd.to_datetime(result['Timestamp'])
    result.set_index(['Timestamp'], inplace=True)
    result = result.reindex(['isAnomaly', 'RawScore', 'Severity'], axis=1)
    result = result.dropna()
    colors = ['red', 'blue', 'black']
    for col, color in zip(result.columns, colors):
        p = figure(background_fill_color="#fafafa", x_axis_type="datetime")
        p.line(result.index, result[col], color=color, alpha=0.8, legend_label=col)
        p.legend.location = "top_left"
        p.legend.click_policy="hide"
        p_list.append(p)
    grid = gridplot([[x] for x in p_list], sizing_mode='scale_width', plot_height=50)
    show(grid)
    result = result.sort_values(by=['RawScore'], ascending=False)
    top_anomaly = list(result[result.isAnomaly].index.strftime('%Y-%m-%dT%H:%M:%SZ'))[0]
    print("Top Anomaly Timestamp is : {0}".format(top_anomaly))
    return series, raw_result, top_anomaly




def show_contribution(local_data_path, raw_result, anomaly_timestamp, start, end):
    anomaly_result = [x for x in raw_result['results'] if 'contributors' in x['value'] and x['timestamp'] == anomaly_timestamp][0]
    contributors = [x['variable'] for x in anomaly_result['value']['contributors']]
    scores = [x['contributionScore'] for x in anomaly_result['value']['contributors']]
    contributors = pd.DataFrame({'contributors': contributors, 'scores': scores})
    contributors = contributors.sort_values(by=['scores'], ascending=False)
    contributors = list(contributors['contributors'][:4])
    series = load_data(local_data_path, start, end)
    series_index = pd.DataFrame({'index': list(range(0, len(series))), 'name': [x.columns[0] for x in series]})
    series_index = series_index.set_index('name')
    sorted_series = [
        series[i][(series[i].index <= np.datetime64(end)) & (series[i].index > np.datetime64(start))]
                                                                     for i in series_index.reindex(contributors)['index'].values]
    p_list = []
    colors = itertools.cycle(palette)
    anomalies = pd.DataFrame({'timestamp': series[0].index, 'value': [0] * len(series[0].index)})
    anomalies = anomalies.set_index('timestamp')
    anomalies.loc[anomalies.index == np.datetime64(anomaly_timestamp),'value'] = 1
    for var, color in zip(sorted_series, colors):
        name = var.columns.values[0]
        p_value = figure(background_fill_color="#fafafa", x_axis_type="datetime")
        plot_lines_multi(var.index, var[name], p_value, color, name)
        p_list.append(p_value)
    p_value = figure(background_fill_color="#fafafa", x_axis_type="datetime")
    plot_lines_multi(anomalies.index, anomalies.value, p_value, 'red', 'Anomaly')
    p_list.append(p_value)
    grid = gridplot([[x] for x in p_list], sizing_mode='scale_width', plot_height=50)
    show(grid)

In [99]:

data_source = SOURCE_BLOB_SAS
local_data_path = inference_dataset_filename + ".zip"
severity = 0.5
start_date = "2021-06-09 00:00:00"
end_date = "2021-07-09 03:00:00"


series, raw_result, top_anomaly = draw(data_source, local_data_path, result_id, severity, start_date, end_date)

BAN11618E31_inference_dataset_type_1.zip 4f2327a8-d266-11ec-a8e9-be621daea75b 0.5 2021-06-09 00:00:00 2021-07-09 03:00:00
<zipfile.ZipFile filename='BAN11618E31_inference_dataset_type_1.zip' mode='r'>




Top Anomaly Timestamp is : 2021-06-27T17:00:00Z
