In [1]:
import sys
sys.path.insert(0, '../src/')
sys.path.insert(0, '../')

import django
django.setup()

In [2]:
from importlib import reload

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import interact, interact_manual, Layout

In [3]:
import datetime

from apps.processing.ala.models import SamplingFeature, Observation
from apps.processing.ala.util import util

from importlib import import_module
from django.conf import settings
from apps.common.models import Process, Property

import pytz
import time
from psycopg2.extras import DateTimeTZRange
from django.utils.dateparse import parse_datetime
from dateutil.parser import parse
# from datetime import datetime
from functools import partial

# from luminol.anomaly_detector import AnomalyDetector
import luminol.anomaly_detector as lad

from apps.utils.time import UTC_P0100
from apps.common.util.util import generate_intervals
# from apps.ad.anomaly_detection import *
import apps.ad.anomaly_detection as ad

from apps.mc.api.views import get_observations, parse_date_range, get_empty_slots, get_not_null_ranges, get_feature_nn_from_list

from apps.processing.ala.models import SamplingFeature, Observation
from apps.common.models import Process, Property, TimeSlots
from psycopg2.extras import DateTimeTZRange

In [4]:
topic = "drought"
prop = "air_temperature"
phenomenon_date_from = pd.to_datetime("2019-01-01")
phenomenon_date_to = pd.to_datetime("2019-03-30")
provider_model = Observation
features = provider_model._meta.get_field('feature_of_interest').remote_field.model.objects.all()
timeslots = provider_model._meta.get_field('time_slots').remote_field.model.objects.all()

In [5]:
def get_observations_func(
    feature = features[0],
    timeslot = timeslots[0],
    phenomenon_date_from = phenomenon_date_from,
    phenomenon_date_to = phenomenon_date_to,
    topic = topic,
    prop = prop
):
    observation_provider_name = f"{provider_model.__module__}.{provider_model.__name__}"
    topic_config = settings.APPLICATION_MC.TOPICS.get(topic)

    properties = topic_config['properties']
    prop_config = properties[prop]

    observation_provider_name = f"{Observation.__module__}.{Observation.__name__}"
    process_name_id = prop_config['observation_providers'][observation_provider_name]["process"]
    process = Process.objects.get(name_id=process_name_id)

    prop_item = Property.objects.get(name_id=prop)

    pt_range, day_from, day_to = parse_date_range(str(phenomenon_date_from), str(phenomenon_date_to))

    pt_range_z = DateTimeTZRange(
        pt_range.lower.replace(tzinfo=UTC_P0100),
        pt_range.upper.replace(tzinfo=UTC_P0100)
    )
    
    zero = timeslot.zero
    
    nn_feature_ranges = get_not_null_ranges(
#         features=[item],
        features=[feature],
        props=[prop],
        topic_config=topic_config,
        observation_provider_name=observation_provider_name,
        provider_model=provider_model,
        pt_range_z=pt_range_z,
        time_slots=timeslot,
    )
    
    data_range = get_feature_nn_from_list(
        nn_feature_ranges,
#         item,
        feature,
        prop_item.id,
        process.id
    )

    feature_time_slots = get_empty_slots(timeslot, data_range)
    
    return (
        partial(
            get_observations,
            feature_time_slots,
            prop_item,
            provider_model,
    #         item,
            feature,
            process,
            timeslot),
        pt_range_z,
        feature_time_slots
    )

In [6]:
def detect_anomalies(
    phenomenon_date_from = phenomenon_date_from,
    phenomenon_date_to = phenomenon_date_to,
    detector_method='bitmap_mod',
    use_baseline=True,
    shift=True,
    extend_range=True,
    detector_params={
        "precision": 6,
        "lag_window_size": 96,
        "future_window_size": 96,
        "chunk_size": 2
    },
    topic = topic,
    prop = prop,
    provider_model = provider_model,
    feature = features[0],
    timeslot = timeslots[0]
):
#     reload(ad)
#     reload(lad)

    get_func, pt_range_z, feature_time_slots = get_observations_func(
        feature,
        timeslot,
        phenomenon_date_from,
        phenomenon_date_to,
        topic,
        prop
    )
    
    anoms = ad.get_timeseries(
    #     phenomenon_time_range=data_range,
        phenomenon_time_range=pt_range_z,
        num_time_slots=len(feature_time_slots),
        get_observations=get_func,
        detector_method=detector_method,
        detector_params=detector_params,
        shift=shift,
        use_baseline=use_baseline,
        extend_range=extend_range,
    )
    
    anoms["feature_time_slots"] = feature_time_slots

    return anoms

In [7]:
def highlight(indices, alpha, color, ax):
    i=0
    while i<len(indices):
        ax.axvspan(indices[i]-0.5, indices[i]+0.5, facecolor=color, edgecolor='none', alpha=alpha)
        i+=1

In [8]:
colors = ['r', 'g', 'c', 'm', 'y', 'k']
results = []

plt.ioff()

def plot(detectors, hlt_detector):
    results = detectors
    fig, ax1 = plt.subplots(figsize=(20,7))
    
    hs = pd.DataFrame({
        'anomalies': detectors[hlt_detector]["property_anomaly_rates"]
    })
    
    perc = detectors[hlt_detector]["property_anomaly_percentiles"]
    color = colors[list(detectors.keys()).index(hlt_detector)]
    for p in perc.keys():
        highlight(hs[hs['anomalies'] > perc[p]].index, p*0.0025, color, ax1)
    
    first_result = detectors[list(detectors.keys())[0]]
    
    ts = pd.DataFrame({
        'values': [float(n) for n in first_result["property_values"]]
    }, index=[n.lower.strftime("%-d.%-m.%Y") for n in first_result["feature_time_slots"]])
    
    values_line = ts['values'].plot.line(ax=ax1, color='b')
    ax1.set_ylabel('values', color='b')
    ax1.tick_params('y', colors='b')
    
    lns = [values_line.get_lines()[0]]
    
    for i in range(len(detectors.keys())):
        detector = list(detectors.keys())[i]
        color = colors[i]
        anomalies = detectors[detector]["property_anomaly_rates"]

        ts = pd.DataFrame({
            'anomalies': anomalies
        })
        
        ax2 = ax1.twinx()
        anomalies_line = ts['anomalies'].plot.line(ax=ax2, color=color, label=detector)
        lns.append(anomalies_line.get_lines()[0])
        ax2.tick_params('y', colors=color)
            

    labs = [ln.get_label() for ln in lns]
    ax1.legend(lns, labs, loc=1)
    
    # baseName = f"{baserange.lower.date()}..{baserange.upper.date()}"
    rangeName = f"{first_result['phenomenon_time_range'].lower.date()}..{first_result['phenomenon_time_range'].upper.date()}"
    # plt.savefig(f"graphs/{baseName}_{rangeName}_window-{str(window_size)}_prec-{str(detector_params['precision'])}.png", format="png")
    # plt.savefig(f"graphs/{rangeName}_window-{str(window_size)}_prec-{str(detector_params['precision'])}.png", format="png")
    
    return fig

In [9]:
detectors = {
    "Bitmap mod": "bitmap_mod",
    "Bitmap mod shift": "bitmap_mod_shift",
    "LinkedIn bitmap": "bitmap_detector",
    "Default": "default_detector",
    "Derivative": "derivative_detector",
    "Exponential average": "exp_avg_detector",
#     "Absolute threshold": "absolute_threshold",
#     "Diff Percent": "diff_percent_threshold",
#     "Sign test": "sign_test",
}

In [10]:
timeslots_list = {str(timeslot).replace("_", " "): timeslot for timeslot in timeslots}

In [11]:
def val_bitmap_mod(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(start_date, end_date, "bitmap_mod", shift=False, feature=feature, timeslot=timeslot, detector_params=detector_params)
def val_bitmap_mod_shift(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(start_date, end_date, "bitmap_mod_shift", feature=feature, timeslot=timeslot, detector_params=detector_params)
def val_bitmap_detector(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(start_date, end_date, "bitmap_detector", feature=feature, timeslot=timeslot, detector_params=detector_params)
def val_default_detector(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(start_date, end_date, "default_detector", feature=feature, timeslot=timeslot, detector_params={}, use_baseline=False, extend_range=False, shift=False)
def val_derivative_detector(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(start_date, end_date, "derivative_detector", feature=feature, timeslot=timeslot, detector_params={}, use_baseline=False, extend_range=False, shift=False)
def val_exp_avg_detector(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(start_date, end_date, "exp_avg_detector", feature=feature, timeslot=timeslot, detector_params={}, use_baseline=False, extend_range=False, shift=False)
def val_absolute_threshold(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(t_from, t_to, "absolute_threshold", feature=feature, timeslot=timeslot, detector_params={}, use_baseline=False, extend_range=False, shift=False)
def val_diff_percent_threshold(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(t_from, t_to, "diff_percent_threshold", feature=feature, timeslot=timeslot, detector_params={}, use_baseline=False, extend_range=False, shift=False)
def val_sign_test(feature, timeslot, start_date, end_date, detector_params):
    return detect_anomalies(t_from, t_to, "sign_test", feature=feature, timeslot=timeslot, detector_params={}, use_baseline=False, extend_range=False, shift=False)

def plot_anomalies(feature, timeslot, start_date, end_date, precision, window_size, chunk_size, hlt_detector, bitmap_mod, bitmap_mod_shift, bitmap_detector, default_detector, derivative_detector, exp_avg_detector):
    args = [feature, timeslot, start_date, end_date, {
        "precision": precision,
        "lag_window_size": window_size,
        "future_window_size": window_size,
        "chunk_size": chunk_size
    }]
    anomalies = {}
    
    if bitmap_mod:
        anomalies["bitmap_mod"] = val_bitmap_mod(*args)
    if bitmap_mod_shift:
        anomalies["bitmap_mod_shift"] = val_bitmap_mod_shift(*args)
    if bitmap_detector:
        anomalies["bitmap_detector"] = val_bitmap_detector(*args)
    if default_detector:
        anomalies["default_detector"] = val_default_detector(*args)
    if derivative_detector:
        anomalies["derivative_detector"] = val_derivative_detector(*args)
    if exp_avg_detector:
        anomalies["exp_avg_detector"] = val_exp_avg_detector(*args)
            
#     if absolute_threshold:
#         anomalies["absolute_threshold"] = val_absolute_threshold(*args)
#     if diff_percent_threshold:
#         anomalies["diff_percent_threshold"] = val_diff_percent_threshold(*args)
#     if sign_test:
#         anomalies["sign_test"] = val_sign_test(*args)
        
        
    if len(anomalies.keys()) > 1:
        i = 0
        while hlt_detector not in anomalies.keys():
            hlt_detector = detectors[i]
            i += 1
        return plot(anomalies, hlt_detector)

In [12]:
t_from = "2019-01-01"
t_to = "2019-03-30"

def hlt_detectors():
    d = {}
    if bitmap_mod_widget.value: d["Bitmap mod"] = "bitmap_mod"
    if bitmap_mod_shift_widget.value: d["Bitmap mod shift"] = "bitmap_mod_shift"
    if bitmap_detector_widget.value: d["LinkedIn bitmap"] = "bitmap_detector"
    if default_detector_widget.value: d["Default"] = "default_detector"
    if derivative_detector_widget.value: d["Derivative"] = "derivative_detector"
    if exp_avg_detector_widget.value: d["Exponential average"] = "exp_avg_detector"
#     if absolute_threshold_widget.value: d["Absolute threshold"] = "absolute_threshold"
#     if diff_percent_threshold_widget.value: d["Diff Percent"] = "diff_percent_threshold"
#     if sign_test_widget.value: d["Sign test"] = "sign_test"
    return d

def update_hlt_detectors(*args):
    hlt_detector_widget.options = hlt_detectors()

bitmap_mod_widget = widgets.Checkbox(value=True,description="Bitmap mod")
bitmap_mod_widget.observe(update_hlt_detectors, "value")
bitmap_mod_shift_widget = widgets.Checkbox(value=True,description="Bitmap mod shift")
bitmap_mod_shift_widget.observe(update_hlt_detectors, "value")
bitmap_detector_widget = widgets.Checkbox(value=True,description="LinkedIn bitmap")
bitmap_detector_widget.observe(update_hlt_detectors, "value")
default_detector_widget = widgets.Checkbox(value=False,description="Default")
default_detector_widget.observe(update_hlt_detectors, "value")
derivative_detector_widget = widgets.Checkbox(value=False,description="Derivative")
derivative_detector_widget.observe(update_hlt_detectors, "value")
exp_avg_detector_widget = widgets.Checkbox(value=False,description="Exponential average")
exp_avg_detector_widget.observe(update_hlt_detectors, "value")
# absolute_threshold_widget = widgets.Checkbox(value=False,description="Absolute threshold")
# absolute_threshold_widget.observe(update_hlt_detectors, "value")
# diff_percent_threshold_widget = widgets.Checkbox(value=False,description="Diff Percent")
# diff_percent_threshold_widget.observe(update_hlt_detectors, "value")
# sign_test_widget = widgets.Checkbox(value=False,description="Sign test")
# sign_test_widget.observe(update_hlt_detectors, "value")

detector_widgets = widgets.HBox([
    widgets.VBox([
        bitmap_mod_widget,
        bitmap_mod_shift_widget,
        bitmap_detector_widget,
    ]),
    widgets.VBox([
        default_detector_widget,
        derivative_detector_widget,
        exp_avg_detector_widget,
    ]),
#     widgets.VBox([
        # absolute_threshold_widget,
        # diff_percent_threshold_widget,
        # sign_test_widget
#     ])
])

hlt_detector_widget = widgets.Dropdown(options=hlt_detectors(), value=detectors["Bitmap mod"], description="Highlight")

precision_widget = widgets.IntSlider(value=6, min=2, max=16, step=1, description="Precision")
window_size_widget = widgets.BoundedIntText(value=96, min=4, max=256, step=1, description="Window size")
chunk_size_widget = widgets.IntSlider(value=2, min=2, max=16, step=1, description="Chunk size")

feature_widget = widgets.Dropdown(options=features, value=features[0], description="Station")
timeslot_widget = widgets.Dropdown(options=timeslots_list, description="Aggregate to")

start_date_widget = widgets.DatePicker(value=pd.to_datetime(t_from).date(), description="Start date")
end_date_widget = widgets.DatePicker(value=pd.to_datetime(t_to).date(), description="End date")

date_widgets = widgets.HBox([
    start_date_widget,
    end_date_widget
])

# ui = widgets.Tab(children=[
widget_accordion = widgets.Accordion(children=[
    widgets.VBox([
        widgets.HBox([feature_widget, timeslot_widget]),
        date_widgets
    ]),
    widgets.VBox([
        precision_widget,
        window_size_widget,
        chunk_size_widget
    ]),
    widgets.VBox([
        detector_widgets,
        hlt_detector_widget
    ])
],
# layout=Layout(
#     height='300px',
#     display='flex',
#     align_items='center',
#     justify_content='center'
# )
)

widget_accordion.set_title(0, "General")
widget_accordion.set_title(1, "Detector parameters")
widget_accordion.set_title(2, "Detectors used")
widget_accordion.selected_index = 0

out = widgets.Output()

plot_button = widgets.Button(
    description="Plot"
)

def click(b):
        
    fig = plot_anomalies(
        bitmap_mod=bitmap_mod_widget.value,
        bitmap_mod_shift=bitmap_mod_shift_widget.value,
        bitmap_detector=bitmap_detector_widget.value,
        default_detector=default_detector_widget.value,
        derivative_detector=derivative_detector_widget.value,
        exp_avg_detector=exp_avg_detector_widget.value,
        # absolute_threshold=absolute_threshold_widget.value,
        # diff_percent_threshold=diff_percent_threshold_widget.value,
        # sign_test=sign_test_widget.value,
        feature=feature_widget.value,
        timeslot=timeslot_widget.value,
        start_date=start_date_widget.value,
        end_date=end_date_widget.value,
        precision=precision_widget.value,
        window_size=window_size_widget.value,
        chunk_size=chunk_size_widget.value,
        hlt_detector=hlt_detector_widget.value
    )
    
    with out:
        clear_output(wait=True)
        display(fig)

plot_button.on_click(click)

ui = widgets.VBox([
    widget_accordion,
    plot_button,
    out
])

def init():
    display(ui)
    click(None)

In [13]:
init()

VBox(children=(Accordion(children=(VBox(children=(HBox(children=(Dropdown(description='Station', options=(<Sam…