# Imports

In [2]:
import sys
import dill as pickle
from math import ceil
import pandas as pd
# to save results to data directory
module_path = '..'
if module_path not in sys.path:
    sys.path.insert(1, module_path)
# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

In [3]:
import logging
import numpy as np
import matplotlib.pyplot as plt

import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

# increase displayed columns in jupyter notebook
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 300)

# temporarily remove deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

logging.basicConfig(format='%(asctime)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
logger = logging.getLogger("TimeSeries")
logger.setLevel(logging.INFO)

output_dir = 'beth_output'

# Dataset

In [4]:
with open(f"_data/BETH/data.pkl", "rb") as fh:
    df= pickle.load(fh)

with open(f"_data/BETH/model.pkl", "rb") as fh:
    outlier_model = pickle.load(fh)

In [5]:
height = 4
width = 5


def plot_save_mp(outlier_model, file_name, s_index=0, e_index=-1, sci_index=False):
    fig = plt.figure()
    if e_index > 0: # Adjust for full dataset
        s_index_mp = s_index - outlier_model.ts_size
        e_index_mp = e_index - outlier_model.ts_size
        local_range = range(s_index,e_index)
    else:
        s_index_mp = s_index
        e_index_mp = e_index
        local_range = list(range(0, len(outlier_model.max_val) - 1))
    plt.plot(local_range, outlier_model.max_val[s_index_mp:e_index_mp], 'b--', label='max')
    plt.plot(local_range, outlier_model.max_mean[s_index_mp:e_index_mp], 'y--', label=r'$\mu$')
    plt.plot(local_range, outlier_model.max_std_dev[s_index_mp:e_index_mp], 'g--', label=r'$\sigma$')
    if sci_index:
        plt.ticklabel_format(axis="y", style="sci", scilimits=(3, 3))
    plt.xlabel('Time (s)')
    plt.ylabel('Matrix Profile Values')

    first_detect = True
    for i in outlier_model.anomalies:
        if i in local_range:
            if e_index < 0:
                i = i - outlier_model.ts_size # Adjust for full dataset
            if first_detect:
                plt.axvline(x=i, color='r', linestyle='-', label='detect')
                first_detect = False
            else:
                plt.axvline(x=i, color='r', linestyle='-', )

    plt.legend(loc=0)

    plt.show()
    fig.set_size_inches(w=width, h=height)
    plt.savefig(file_name)

In [6]:
def plot_save_freq(data, file_name, s_index=0, e_index=-1, sci_index=False, height=4, width=5):
    fig = plt.figure()
    plt.plot(data[s_index:e_index])
    plt.xlabel('Time (s)')
    plt.ylabel('User ID')
    if sci_index:
        plt.ticklabel_format(axis="y", style="sci", scilimits=(3, 3))


    plt.show()
    fig.set_size_inches(w=width, h=height)
    plt.savefig(file_name)

In [7]:
outlier_key = "userId"
data_f_c = df[outlier_key]

In [8]:
height = 4
width = 6

plot_save_freq(data_f_c, f'{output_dir}/base_sig_{outlier_key}_all.pgf', sci_index=False, height=height, width=width)

  plt.show()


In [9]:
height = 4
width = 6

plot_save_mp(outlier_model, f'{output_dir}/mp_hist_{outlier_key}.pgf', sci_index=True)

  plt.show()


In [10]:
# Results plot
fig = plt.figure()
data_test = df.copy()
plt.plot(data_test["evil"] )
# plt.title(f"Detection Method: Anomaly")
logger.info(outlier_model.anomalies)
for i in outlier_model.anomalies:
    plt.axvline(x=i,color='r')
plt.xlabel(r'time')
plt.ylabel(r'evil')

plt.show()

fig.set_size_inches(w=width, h=height)
plt.savefig(f'{output_dir}/outlier_result_evil_{outlier_key}.pgf')

04/22/2022 12:53:37 PM: [11000, 213090, 237204, 485354, 787618, 816542]
  plt.show()


In [11]:
# Results plot
fig = plt.figure()
data_test = df.copy()
plt.plot(data_test["sus"] )
# plt.title(f"Detection Method: Anomaly")
logger.info(outlier_model.anomalies)
for i in outlier_model.anomalies:
    plt.axvline(x=i,color='r')
plt.xlabel(r'time')
plt.ylabel(r'sus')

plt.show()

fig.set_size_inches(w=width, h=height)
plt.savefig(f'{output_dir}/outlier_result_sus_{outlier_key}.pgf')

04/22/2022 12:53:37 PM: [11000, 213090, 237204, 485354, 787618, 816542]
  plt.show()


In [17]:
# adapted from https://stackoverflow.com/questions/44951911/plot-a-binary-timeline-in-matplotlib?answertab=modifieddesc#tab-top
width = 5.5
height = 2.5

fault_key = "evil"
#create a time series s with dates as index and 0 and 1 for events
my_column_changes = df[fault_key].shift() != df[fault_key]

events = df[my_column_changes]

fault_start = events.loc[events[fault_key] == 1].index.tolist()
fault_end = events.loc[events[fault_key] == 0].index.tolist()

no_fault_no_ends = fault_end [1:]

times_faults = list(zip(fault_start, no_fault_no_ends))
fault_start.append(df.index[-1]) # needs last element to finalize graph
times_no_faults = list(zip(fault_end ,fault_start))

bar_green =  list(map(lambda x: (x[0], x[1] - x[0]) , times_no_faults))
bar_red = list(map(lambda x: (x[0], x[1] - x[0]) , times_faults))

fault_key = "sus"
#create a time series s with dates as index and 0 and 1 for events
my_column_changes = df[fault_key].shift() != df[fault_key]

events = df[my_column_changes]

fault_start = events.loc[events[fault_key] == 1].index.tolist()
fault_end = events.loc[events[fault_key] == 0].index.tolist()

no_fault_no_ends = fault_end [1:]

times_faults = list(zip(fault_start, no_fault_no_ends))
fault_start.append(df.index[-1]) # needs last element to finalize graph
times_no_faults = list(zip(fault_end ,fault_start))

bar_green2 =  list(map(lambda x: (x[0], x[1] - x[0]) , times_no_faults))
bar_red2 = list(map(lambda x: (x[0], x[1] - x[0]) , times_faults))


fig, ax = plt.subplots(figsize=(width, height))


red_tuple = (0,bar_red2[5000][0])
bar_green2 = bar_green2[5001:]
bar_red2 = bar_red2[5000:]

bar_red2.insert(0,red_tuple)

plt.broken_barh(bar_green2, (0.25, 1), color="lightgreen")
plt.broken_barh(bar_red2, (0.25, 1), color="lightsalmon")
plt.broken_barh(bar_green, (-1, 1), color="lightgreen")
plt.broken_barh(bar_red, (-1, 1), color="lightsalmon")

ax.vlines(
    outlier_model.anomalies,
    ymin=-1,
    ymax=1.25,
    linewidth=2,
    colors='r',
    linestyle='--', label='detection')

#Time line
timeline_y = -0.5
ax.axhline(
    timeline_y,
    linewidth=1,
    linestyle='--',
    color='black')

#Feature line
feature_line_y = 0.75
ax.axhline(
    feature_line_y,
    linewidth=1,
    linestyle='--',
    c='black')

ax.legend()
#format axes
ax.margins(0)
ax.set_yticks([ feature_line_y,timeline_y], labels=['Suspicious', 'Harmful'])
# Remove ticks on y-axis

ax.tick_params(left = False)
ax.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
ax.set_xlabel('Time (s)')

# ax.set_ylabel('Ground Truth')

plt.yticks(rotation = 45)
plt.tight_layout()
plt.show()

fig.set_size_inches(w=width, h=height)
plt.savefig(f'{output_dir}/outlier_result_{outlier_key}.pgf')

  plt.show()
