# Common Functions

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def read_dataaset(path, anomaly_points, time_col='timestamp', anomaly_col='anomaly'):
    # Read dataset
    df = pd.read_csv(path,low_memory=False)
    df[time_col] = pd.to_datetime(df[time_col])

    # Labeling: if anomaly then 1 else 0
    df[anomaly_col] = 0  # Set default values
    for start, end in anomaly_points:
        df.loc[((df[time_col] >= start) & (df[time_col] <= end)), anomaly_col] = 1

    return df


def plot_anomaly(title, df, label='anomaly', plot_item='value', time_col='timestamp'):
    sections = list()
    state = df.iloc[0][label]
    start = 0
    for i in df.index:
        if df.loc[i][label] != state:
            state = df.loc[i][label]
            sections.append(df.loc[start:i].copy(deep=True))
            start = i

    plt.figure().set_figwidth(20)
    plt.title(title)
    for s in sections:
        # Change index
        # s.index = s[time_col]

        color = 'blue' if s.iloc[0][label] == 0 else 'red'
        plt.plot(s.index, s[plot_item], color=color)
        # plt.scatter(s.index, s[plot_item], color=color)
    # plt.show()
    plt.savefig('./plot/{}.png'.format(title))
    plt.close()


def read_plot(path, f, anomaly_points):
    df = read_dataaset('{}/data/{}'.format(path, f), anomaly_points)
    title = os.path.basename(f)
    print(title)
    plot_anomaly(title, df)
    return df

# Plot

In [2]:
# importing the module
import json

# Opening JSON file
f_path = '/nas.dbms/mahendra.data/Documents/annotation/NAB'
with open(f_path + '/labels/combined_windows.json') as json_file:
    anomalies = json.load(json_file)


for key, value in anomalies.items():
    raw = read_plot(f_path, key, value)

art_daily_no_noise.csv
art_daily_perfect_square_wave.csv
art_daily_small_noise.csv
art_flatline.csv
art_noisy.csv
art_daily_flatmiddle.csv
art_daily_jumpsdown.csv
art_daily_jumpsup.csv
art_daily_nojump.csv
art_increase_spike_density.csv
art_load_balancer_spikes.csv
ec2_cpu_utilization_24ae8d.csv
ec2_cpu_utilization_53ea38.csv
ec2_cpu_utilization_5f5533.csv
ec2_cpu_utilization_77c1ca.csv
ec2_cpu_utilization_825cc2.csv
ec2_cpu_utilization_ac20cd.csv
ec2_cpu_utilization_c6585a.csv
ec2_cpu_utilization_fe7f93.csv
ec2_disk_write_bytes_1ef3de.csv
ec2_disk_write_bytes_c0d644.csv
ec2_network_in_257a54.csv
ec2_network_in_5abac7.csv
elb_request_count_8c0756.csv
grok_asg_anomaly.csv
iio_us-east-1_i-a2eb1cd9_NetworkIn.csv
rds_cpu_utilization_cc0c53.csv
rds_cpu_utilization_e47b3b.csv
exchange-2_cpc_results.csv
exchange-2_cpm_results.csv
exchange-3_cpc_results.csv
exchange-3_cpm_results.csv
exchange-4_cpc_results.csv
exchange-4_cpm_results.csv
ambient_temperature_system_failure.csv
cpu_utilization_as