In [2]:
import pandas as pd
import numpy as np
import os 
os.chdir("./MTAD-GAT/result/")
import json
from datetime import datetime
import plotly as py
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px
import cufflinks as cf
cf.go_offline()

In [15]:
class Plotter:
    def __init__(self, train_name, test_name):
        self.train_output = None
        self.test_output = None
        self.labels_available = False
        self.pred_cols = None
        self.train_name = train_name
        self.test_name = test_name
        self._load_results()
        self.train_output['timestamp'] = self.train_output.index
        self.test_output["timestamp"] = self.test_output.index

    def _load_results(self):
        #train_output = pd.read_csv("./_iter0_all_trainresult.csv")
        train_output = pd.read_csv(self.train_name)
        train_output["A_True_Global"] = 0
        #test_output = pd.read_csv("./_iter0_all_testresult.csv")
        test_output = pd.read_csv(self.test_name)
        
        self.train_output = train_output
        self.test_output = test_output

    def create_shapes(self, ranges, sequence_type, _min, _max, plot_values, is_test=True, xref=None, yref=None):
        if _max is None:
            _max = max(plot_values["errors"])

        if sequence_type is None:
            color = "crimson"
        else:
            color = "crimson" if sequence_type == "true" else "crimson"
        
        shapes = []

        for r in ranges:
            w = 2
            x0 = r[0] - w
            x1 = r[1] + w
            shape = {
                "type": "rect",
                "x0": x0,
                "y0": _min,
                "x1": x1,
                "y1": _max,
                "fillcolor": color,
                "opacity": 0.08,
                "line": {
                    "width": 0,
                },
            }
            if xref is not None:
                shape["xref"] = xref
                shape["yref"] = yref
            shapes.append(shape)
        return shapes
    
    ## anomaly score가 높은 구간 // 이상치의 위치 탐지하는 함수
    @staticmethod
    def get_anomaly_sequences(values):
        splits = np.where(values[1:] != values[:-1])[0] + 1
        if values[0] == 1:
            splits = np.insert(splits, 0, 0)
        a_seqs = []
        for i in range(0, len(splits) - 1, 2):
            a_seqs.append([splits[i], splits[i + 1] - 1])
        if len(splits) % 2 == 1:
            a_seqs.append([splits[-1], len(values) - 1])
        return a_seqs
    
    
    def plot_feature(self, feature, plot_train=False, plot_errors=True, plot_feature_anom=True, start=None, end=None):

        test_copy = self.test_output.copy()

        if start is not None and end is not None:
            assert start < end
        if start is not None:
            test_copy = test_copy.iloc[start:, :]
        if end is not None:
            start = 0 if start is None else start
            test_copy = test_copy.iloc[: end - start, :]

        plot_data = [test_copy]

        if plot_train:
            train_copy = self.train_output.copy()
            plot_data.append(train_copy)

        for nr, data_copy in enumerate(plot_data):
            is_test = nr == 0

            if feature < 0 or f"For_{feature}" not in data_copy.columns:
                raise Exception(f"Channel {feature} not present in data.")

            i = feature
            plot_values = {
                "timestamp": data_copy["timestamp"].values,
                "y_forecast": data_copy[f"For_{i}"].values,
                "y_recon": data_copy[f"Rec_{i}"].values,
                "y_true": data_copy[f"Act_{i}"].values,
                "errors": data_copy["Score_Global"].values,
                "threshold": data_copy["Threshold_Global"]
            }

            anomaly_sequences = {
                "pred": self.get_anomaly_sequences(data_copy["Pred_Global"].values),
               # "true": self.get_anomaly_sequences(data_copy["Label_Global"].values),
            }

            if is_test and start is not None:
                anomaly_sequences['pred'] = [[s+start, e+start] for [s, e] in anomaly_sequences['pred']]
               # anomaly_sequences['true'] = [[s+start, e+start] for [s, e] in anomaly_sequences['true']]

            y_min = 1.1 * plot_values["y_true"].min()
            y_max = 1.1 * plot_values["y_true"].max()
            e_max = 1.5 * plot_values["errors"].max()

            y_shapes = self.create_shapes(anomaly_sequences["pred"], "predicted", y_min, y_max, plot_values, is_test=is_test)
            e_shapes = self.create_shapes(anomaly_sequences["pred"], "predicted", 0, e_max, plot_values, is_test=is_test)

            y_df = pd.DataFrame(
                {
                    "timestamp": plot_values["timestamp"].reshape(-1,),
                    "y_forecast": plot_values["y_forecast"].reshape(-1,),
                    "y_recon": plot_values["y_recon"].reshape(-1,),
                    "y_true": plot_values["y_true"].reshape(-1,)
                }
            )

            e_df = pd.DataFrame(
                {
                    "timestamp": plot_values["timestamp"],
                    "e_s": plot_values["errors"].reshape(-1,),
                    "threshold": plot_values["threshold"],
                }
            )

            data_type = "Test data" if is_test else "Train data"
            y_layout = {
                "title": f"{data_type} | Forecast & reconstruction vs true value for {self.pred_cols[i] if self.pred_cols is not None else ''} ",
                "showlegend": True,
                "height": 400,
                "width": 1300,
            }

            e_layout = {
                "title": f"{data_type} | Error for {self.pred_cols[i] if self.pred_cols is not None else ''}",
                #"yaxis": dict(range=[0, e_max]),
                "height": 400,
                "width": 1300,
            }

            if plot_feature_anom:
                y_layout["shapes"] = y_shapes
                e_layout["shapes"] = e_shapes

            lines = [
                go.Scatter(
                    x=y_df["timestamp"],
                    y=y_df["y_true"],
                    #line_color="rgb(0, 204, 150, 0.5)",
                    line_color="darkgray",
                    name="y_true",
                    line=dict(width=1.5)),
                go.Scatter(
                    x=y_df["timestamp"],
                    y=y_df["y_forecast"],
                    line_color = 'darkslategray',
                    #line_color="rgb(255, 127, 14, 1)",
                    name="y_forecast",
                    line=dict(width=1.5)),
                go.Scatter(
                    x=y_df["timestamp"],
                    y=y_df["y_recon"],
                    line_color = 'skyblue',
                    #line_color="rgb(31, 119, 180, 1)",
                    name="y_recon",
                    line=dict(width=1.5)),
            ]

            fig = go.Figure(data=lines, layout=y_layout)
            name = self.train_name.split("_")[0]
            #py.offline.iplot(fig)
            fig.write_image("/camin1/inyoung/MTAD-GAT/Visualization/"+name+"/feature_"+str(i)+".png")

            e_lines = [
                go.Scatter(
                    x=e_df["timestamp"],
                    y=e_df["e_s"],
                    name="Error",
                    line=dict(color="crimson", width=1))]
            if plot_feature_anom:
                e_lines.append(
                    go.Scatter(
                        x=e_df["timestamp"],
                        y=e_df["threshold"],
                        name="Threshold",
                        line=dict(color="black", width=1, dash="dash")))

            if plot_errors:
                e_fig = go.Figure(data=e_lines, layout=e_layout)
                #py.offline.iplot(e_fig)
    
    def plot_global_predictions(self, type="test"):
        if type == "test":
            data_copy = self.test_output.copy()
        else:
            data_copy = self.train_output.copy()

        fig, axs = plt.subplots(
            3,
            figsize=(30, 10),
            sharex=True,
        )
        axs[0].plot(data_copy[f"Score_Global"], c="r", label="anomaly scores")
        axs[0].plot(data_copy["Threshold_Global"], linestyle="dashed", c="black", label="threshold")
        axs[1].plot(data_copy["Pred_Global"], label="predicted anomalies", c="orange")
        if self.labels_available and type == "test":
            axs[2].plot(
                data_copy["A_True_Global"],
                label="actual anomalies",
            )
        axs[0].set_ylim([0, 5 * np.mean(data_copy["Threshold_Global"].values)])
        fig.legend(prop={"size": 20})
        plt.show()
        
    def plotly_global_predictions(self, type="test"):
        is_test = True
        if type == "train":
            data_copy = self.train_output.copy()
            is_test = False
        elif type == "test":
            data_copy = self.test_output.copy()

        tot_anomaly_scores = data_copy["Score_Global"].values
        pred_anomaly_sequences = self.get_anomaly_sequences(data_copy[f"Pred_Global"].values)
        threshold = data_copy['Threshold_Global'].values
        y_min = -0.1
        y_max = 5 * np.mean(threshold) # np.max(tot_anomaly_scores)
        shapes = self.create_shapes(pred_anomaly_sequences, "pred", y_min, y_max, None, is_test=is_test)
        if self.labels_available and is_test:
            true_anomaly_sequences = self.get_anomaly_sequences(data_copy[f"Label_Global"].values)
            shapes2 = self.create_shapes(true_anomaly_sequences, "true", y_min, y_max, None, is_test=is_test)
            shapes.extend(shapes2)

        layout = {
            "title": f"{type} set | Total error, predicted anomalies in blue, true anomalies in red if available "
                     f"(making correctly predicted in purple)",
            "shapes": shapes,
            "yaxis": dict(range=[0, y_max]),
            "height": 400,
            "width": 1500
        }

        fig = go.Figure(
            data=[go.Scatter(x=data_copy["timestamp"], y=tot_anomaly_scores, name='Error', line=dict(width=1, color="darkred")),
                  go.Scatter(x=data_copy["timestamp"], y=threshold, name='Threshold', line=dict(color="black", width=1, dash="dash"))],
            layout=layout,
        )
        py.offline.iplot(fig)
        
    def plotly_global_predictions2(self, feature, type="test"):
        is_test = True
        if type == "train":
            data_copy = self.train_output.copy()
            is_test = False
        elif type == "test":
            data_copy = self.test_output.copy()

        tot_anomaly_scores = data_copy["Score_Global"].values
        pred_anomaly_sequences = self.get_anomaly_sequences(data_copy[f"Pred_Global"].values)
        threshold = data_copy['Threshold_Global'].values
        y_min = -0.1
        y_max = 5 * np.mean(threshold) # np.max(tot_anomaly_scores)
        shapes = self.create_shapes(pred_anomaly_sequences, "pred", y_min, y_max, None, is_test=is_test)
        if self.labels_available and is_test:
            true_anomaly_sequences = self.get_anomaly_sequences(data_copy[f"Label_Global"].values)
            shapes2 = self.create_shapes(true_anomaly_sequences, "true", y_min, y_max, None, is_test=is_test)
            shapes.extend(shapes2)
        
        i = feature
        layout = {
            "title": "Node" + str(i),
            "shapes": shapes,
            "yaxis": dict(range=[0, 1]),
            "height": 400,
            "width": 2000
        }
        
            
        i = feature

        fig = go.Figure(
            data=[go.Scatter(x=data_copy["timestamp"], y=data_copy[f"Act_{i}"].values, line_color="rgb(0, 204, 150, 0.5)", name="y_true", line=dict(width=1)),
                  go.Scatter(x=data_copy["timestamp"], y=threshold, name='Threshold', line=dict(color="black", width=1, dash="dash"))],
            layout=layout,
        )
        py.offline.iplot(fig)

In [16]:
list_ = os.listdir()

    save folder

In [None]:
for i in range(0, len(list_), 2):
    name = list_[i].split("_")[0]
    os.mkdir("/camin1/inyoung/MTAD-GAT/visualization/"+name)

In [None]:
for i in range(0,len(list_),2):
    print("###################################" + list_[i].split("_")[0] + "###################################")
    train_name = list_[i]; test_name = list_[i+1]
    plotter = Plotter(train_name, test_name)
    for j in range(200):
        plotter.plot_feature(feature=j)

In [None]:
for i in range(0,len(list_),2):
    print("###################################" + list_[i].split("_")[0] + "###################################")
    train_name = list_[i]; test_name = list_[i+1]
    plotter = Plotter(train_name, test_name)
    plotter.plotly_global_predictions()

In [None]:
for i in range(0,len(list_),2):
    print("###################################" + list_[i].split("_")[0] + "###################################")
    train_name = list_[i]; test_name = list_[i+1]
    plotter = Plotter(train_name, test_name)
    plotter.plot_global_predictions()