In [1]:
import sys, os
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from collections import defaultdict
from tqdm import tqdm
import numpy as np
import json
from tqdm import tqdm

In [2]:
%load_ext autoreload
%autoreload 2

## Load Feature List

In [3]:
os.environ['SM_MODEL_DIR'] = '../../HetGNN/code/model_save'
os.environ['SM_CHANNEL_TRAIN'] = '../../HetGNN/ProcessedData_rw_top10/feature_list'

In [4]:
sys.path.append("../../HetGNN/code/")

import torch
import data_generator
from args import read_args
import tools
from config import relations, selected_relations, selected_idx

In [5]:
from sklearn.manifold import TSNE
import torch

In [6]:
args = read_args()
args.data_path = '../../HetGNN/ProcessedData_rw_top10'
args

Namespace(batch_s=6540, checkpoint='', cuda=0, data_path='../../HetGNN/ProcessedData_rw_top10', embed_d=77, lr=0.0001, mini_batch_s=654, model_path='../../HetGNN/code/model_save', out_embed_d=128, preprocess=0, random_seed=10, save_model_freq=5, train='../../HetGNN/ProcessedData_rw_top10/feature_list', train_iter_n=150, train_test_label=0, walk_L=30, walk_n=10, window=5)

In [7]:
# selected_relations = [relations[i] for i in selected_idx]
selected_relations

['0_0',
 '0_1',
 '0_2',
 '0_3',
 '0_4',
 '0_5',
 '0_6',
 '0_7',
 '1_0',
 '1_1',
 '1_2',
 '1_3',
 '1_4',
 '1_5',
 '1_6',
 '1_7',
 '2_0',
 '2_1',
 '2_2',
 '2_3',
 '2_4',
 '2_5',
 '2_6',
 '2_7',
 '3_0',
 '3_1',
 '3_2',
 '3_3',
 '3_4',
 '3_5',
 '3_6',
 '3_7',
 '4_0',
 '4_1',
 '4_2',
 '4_3',
 '4_4',
 '4_5',
 '4_6',
 '4_7',
 '5_0',
 '5_1',
 '5_2',
 '5_3',
 '5_4',
 '5_5',
 '5_6',
 '5_7',
 '6_0',
 '6_1',
 '6_2',
 '6_3',
 '6_4',
 '6_5',
 '6_6',
 '6_7',
 '7_2',
 '7_4',
 '7_5']

In [8]:
# def sample_viz_feature(graph_node_feature_df, gid_list=None, sample_size=10, relation_idx=None):
    
#     if gid_list is None:
#         sampled_benign_gid = graph_node_feature_df[graph_node_feature_df['trace_bool'] == True].sample(sample_size)['trace_id'].values
#         sampled_error_gid = graph_node_feature_df[graph_node_feature_df['trace_bool'] == False].sample(sample_size)['trace_id'].values
#         gid_list = np.concatenate([sampled_benign_gid, sampled_error_gid])
    
#     sample_feature_df = graph_node_feature_df[graph_node_feature_df['gid'].isin(gid_list)]
    
#     tsne_ = TSNE(n_components=2, init='random')
#     tsne_embeddings = tsne_.fit_transform(
#         sample_feature_df[feature_cols].values
#     )

#     sample_feature_df['tsne_x'] = tsne_embeddings[:,0]
#     sample_feature_df['tsne_y'] = tsne_embeddings[:,1]
    
#     fig = px.scatter(sample_feature_df, x='tsne_x', y='tsne_y', color='trace_bool', title=f'Relation Idx: {relation_idx} Node Feature Viz')
#     fig.write_image(f'images/relation_{relation_idx}_node_viz.png')
#     fig.show()
    
#     return sample_feature_df
       
# sample_viz_feature(graph_node_feature_df, gid_list=sample_gid)
        

# Model Analysis

In [9]:
def load_feature_list(feature_list_path, select_relations=[]):
    feature_list = []
    feature_index = []
    
    if len(select_relations) > 0:
        relations = select_relations

    for r in relations:
        f_path = f'{feature_list_path}/feature_list/feature_list_{r}.pt'
        idx_path = f'{feature_list_path}/feature_list/feature_index_{r}.pt'

        print(f'Read relation feature list {f_path} ..')

        feature_ = torch.load(f_path)
        index_ = torch.load(idx_path)

        graph_index = defaultdict(list)
        for i, gid in enumerate(index_):
            graph_index[gid].append(i)


        print(feature_.size())

        feature_list.append(feature_)
        feature_index.append(graph_index)
    return feature_list, feature_index

In [10]:
def plot_train_results(model_root_dir, model_save_freq=5):
    train_loss = pd.read_csv(f'{model_root_dir}/train_loss.txt', names=['loss'])
    train_loss = train_loss.reset_index().rename(columns={'index': 'epoch'})

    eval_result = pd.read_csv(f'{model_root_dir}/eval_metrics.txt', sep=' ', names=['AUC', 'AP'])
    eval_result = eval_result.reset_index().rename(columns={'index': 'epoch'})
    eval_result['epoch'] = eval_result['epoch'] * model_save_freq

    eval_result['avg_metric'] = (eval_result['AUC'] + eval_result['AP'])/2


    # train_loss, eval_result

    fig = make_subplots(specs=[[{"secondary_y": True}]])

    for metric in eval_result.columns[1:]:
        fig.add_trace(
            go.Scatter(x=eval_result['epoch'], y=eval_result[metric],
                       mode='lines',
                       name=metric),
            secondary_y=False
        )

    fig.add_trace(
        go.Scatter(x=train_loss['epoch'], y=train_loss['loss'],
                    mode='lines',
                    name='loss'),
        secondary_y=True,
    )
    fig.show()
    
    return train_loss, eval_result, fig

## Top 10 Neighbours + Full Feature from Feature List

In [43]:
model_root_dir = "/home/ec2-user/SageMaker/repo/DeepTraLog/HetGNN/model_save_top10/"
feature_list_root_dir = '/home/ec2-user/SageMaker/repo/DeepTraLog/HetGNN/ProcessedData_rw_top10'

In [44]:
train_loss_t10, eval_result_t10, fig_t10 = plot_train_results(model_root_dir)

In [15]:
# feature_list, feature_index = load_feature_list(feature_list_root_dir, selected_relations)

In [16]:
model_idx = 45
model_path = f'{model_root_dir}/HetGNN_{model_idx}.pt'

In [17]:
# model = tools.HetAgg(args, feature_list, feature_index)

# # load fixed Center
# svdd_center = torch.load('../HetGNN/model_save_norm_rw_node_attr/HetGNN_SVDD_Center.pt')
# model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
# model.eval()

In [18]:
# torch.save(model, f'{model_root_dir}/direct_full_model_{model_idx}.pt')
model = torch.load(f'{model_root_dir}/direct_full_model_{model_idx}.pt')
model

HetAgg(
  (fc_neigh_agg_layers): ModuleList(
    (0): Linear(in_features=77, out_features=128, bias=True)
    (1): Linear(in_features=77, out_features=128, bias=True)
    (2): Linear(in_features=77, out_features=128, bias=True)
    (3): Linear(in_features=77, out_features=128, bias=True)
    (4): Linear(in_features=77, out_features=128, bias=True)
    (5): Linear(in_features=77, out_features=128, bias=True)
    (6): Linear(in_features=77, out_features=128, bias=True)
    (7): Linear(in_features=77, out_features=128, bias=True)
    (8): Linear(in_features=77, out_features=128, bias=True)
    (9): Linear(in_features=77, out_features=128, bias=True)
    (10): Linear(in_features=77, out_features=128, bias=True)
    (11): Linear(in_features=77, out_features=128, bias=True)
    (12): Linear(in_features=77, out_features=128, bias=True)
    (13): Linear(in_features=77, out_features=128, bias=True)
    (14): Linear(in_features=77, out_features=128, bias=True)
    (15): Linear(in_features=77, ou

In [45]:
model.svdd_center = torch.load(f'{model_root_dir}/HetGNN_SVDD_Center.pt', map_location=torch.device('cpu'))

In [19]:
import numpy as np
from sklearn.manifold import TSNE

In [20]:
trace_info = pd.read_csv('../../HetGNN/ProcessedData/trace_info.csv')
trace_info

Unnamed: 0,trace_id,trace_bool,error_trace_type,process_idx
0,0,True,normal,0
1,1,True,normal,0
2,2,True,normal,0
3,3,True,normal,0
4,4,True,normal,0
...,...,...,...,...
132480,132480,False,F04-02,8
132481,132481,False,F04-02,8
132482,132482,False,F04-02,8
132483,132483,False,F04-02,8


### Train Embeddings

In [86]:
def tsne_vis(gids, name, color='trace_bool', export=False, output_dir='images/model_out'):
    outputs = model(gids)
    print(f'Model Output Shape: {outputs.shape}')
    print('calc TSNE Embedding ..')
    tsne_ = TSNE(n_components=2, init='random')
    tsne_embeddings = tsne_.fit_transform(outputs.detach().numpy())
    
    results = pd.DataFrame(tsne_embeddings, columns=['tsne_x', 'tsne_y'])
    results['trace_id'] = gids
    
    results = results.merge(trace_info, on='trace_id', how='inner')
    
    fig = px.scatter(results, x='tsne_x', y='tsne_y', color=color, title='Data Embeddings')
    fig.show()

    if export:
        fig.write_image(f"{output_dir}/{name}_embedding.png")
    
    return results

In [22]:
with open(f'{model_root_dir}/model_gid_list_train.txt', 'r') as fin:
    train_gids = [int(i) for i in fin.read().strip().split(' ')]

with open(f'{model_root_dir}/model_gid_list_eval.txt', 'r') as fin:
    eval_gids = [int(i) for i in fin.read().strip().split(' ')]

with open(f'{model_root_dir}/model_gid_list_test.txt', 'r') as fin:
    test_gids = [int(i) for i in fin.read().strip().split(' ')]

In [23]:
unique_error_types = trace_info[trace_info.trace_id.isin(eval_gids)].error_trace_type.unique()
unique_error_types

array(['normal', 'F02-06', 'F07-04', 'F07-05', 'F04-06', 'F06-05',
       'F01-03', 'F05-01', 'F12-01', 'F01-02', 'F05-03', 'F06-04',
       'F05-02', 'F14-03', 'F10-02', 'F03-07', 'F03-08', 'F13-03',
       'F07-02', 'F02-04', 'F11-01', 'F04-01', 'F08-01', 'F10-01',
       'F10-03', 'F09-01', 'F09-03', 'F02-01', 'F02-03', 'F02-02',
       'F04-03', 'F13-05', 'F07-01', 'F08-04', 'F14-02', 'F08-02',
       'F12-03', 'F03-03', 'F11-02', 'F04-07', 'F04-08', 'F06-01',
       'F06-02', 'F06-03', 'F13-04', 'F13-01', 'F01-01', 'F08-05',
       'F01-04', 'F01-05', 'F11-05', 'F11-04', 'F11-03', 'F08-03',
       'F03-01', 'F05-05', 'F07-03', 'F09-02', 'F03-04', 'F04-05',
       'F03-06', 'F03-05', 'F12-05', 'F05-04', 'F02-05', 'F03-02',
       'F14-01', 'F13-02', 'F12-02', 'F04-04', 'F04-02'], dtype=object)

In [24]:
feature_index = model.feature_index
feaute_list = model.feature_list

In [25]:
# select_gids = []
# for gid in eval_gids:
#     print(f'############# gid: {gid} #############')
    
#     list_len = 0
#     for i in range(len(feature_index)):
#         print(f'idx {i}: {feature_index[i][gid]}')
#         list_len += len(feature_index[i][gid])
#     if list_len > 0:
#         select_gids.append(gid)
        

### Overall View

In [89]:
tsne_vis(eval_gids, name='eval')

Model Output Shape: torch.Size([33542, 128])
calc TSNE Embedding ..


Unnamed: 0,tsne_x,tsne_y,trace_id,trace_bool,error_trace_type,process_idx
0,16.648544,-14.910937,21592,True,normal,1
1,-16.010185,-47.793129,126584,False,F11-04,8
2,9.108427,-16.439528,80003,True,normal,5
3,-10.842209,37.329807,3106,True,normal,0
4,-75.372215,27.012068,110891,False,F04-07,7
...,...,...,...,...,...,...
33537,-40.297176,50.743423,132052,True,normal,8
33538,-0.803019,-21.101320,8649,False,F01-04,0
33539,56.236805,-7.191509,57541,False,F13-02,3
33540,-20.073795,-20.076468,84197,True,normal,5


In [26]:
eval_output = model(eval_gids)
eval_output

tensor([[0.5254, 0.5696, 0.5401,  ..., 0.5018, 0.5197, 0.4888],
        [0.5254, 0.5696, 0.5401,  ..., 0.5018, 0.5197, 0.4889],
        [0.5254, 0.5696, 0.5401,  ..., 0.5018, 0.5197, 0.4888],
        ...,
        [0.5254, 0.5696, 0.5401,  ..., 0.5018, 0.5197, 0.4889],
        [0.5254, 0.5696, 0.5401,  ..., 0.5018, 0.5197, 0.4889],
        [0.5258, 0.5709, 0.5409,  ..., 0.5017, 0.5199, 0.4883]],
       grad_fn=<SigmoidBackward0>)

In [27]:
eval_df = pd.DataFrame(eval_output.detach().numpy(), columns=[f't{i}' for i in range(128)])
eval_df['trace_id'] = eval_gids
eval_df = eval_df.merge(trace_info, on='trace_id', how='inner')
eval_df

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t122,t123,t124,t125,t126,t127,trace_id,trace_bool,error_trace_type,process_idx
0,0.525433,0.569617,0.540100,0.526803,0.530608,0.524563,0.525649,0.525477,0.515480,0.525254,...,0.534524,0.533239,0.505165,0.501828,0.519733,0.488845,21592,True,normal,1
1,0.525430,0.569610,0.540116,0.526799,0.530607,0.524591,0.525661,0.525477,0.515474,0.525257,...,0.534532,0.533230,0.505159,0.501832,0.519711,0.488850,126584,False,F11-04,8
2,0.525432,0.569616,0.540100,0.526796,0.530612,0.524564,0.525652,0.525475,0.515477,0.525255,...,0.534524,0.533236,0.505161,0.501831,0.519733,0.488850,80003,True,normal,5
3,0.525577,0.570677,0.540690,0.527123,0.530843,0.524694,0.525749,0.525687,0.515409,0.525421,...,0.534742,0.533545,0.504862,0.501379,0.519622,0.488178,3106,True,normal,0
4,0.523124,0.556718,0.533805,0.524192,0.527343,0.523205,0.522959,0.525413,0.517336,0.523221,...,0.530028,0.529936,0.508305,0.507110,0.518273,0.496389,110891,False,F04-07,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33537,0.524852,0.566108,0.538749,0.526341,0.529822,0.524290,0.525507,0.524794,0.515646,0.524549,...,0.533516,0.532111,0.506067,0.503081,0.519532,0.490920,132052,True,normal,8
33538,0.525437,0.569617,0.540113,0.526802,0.530591,0.524601,0.525655,0.525475,0.515480,0.525265,...,0.534533,0.533227,0.505172,0.501833,0.519711,0.488846,8649,False,F01-04,0
33539,0.525437,0.569599,0.540097,0.526793,0.530599,0.524555,0.525656,0.525472,0.515486,0.525256,...,0.534526,0.533236,0.505167,0.501823,0.519735,0.488878,57541,False,F13-02,3
33540,0.525425,0.569607,0.540091,0.526801,0.530593,0.524559,0.525650,0.525471,0.515490,0.525262,...,0.534526,0.533236,0.505164,0.501827,0.519735,0.488868,84197,True,normal,5


In [28]:
eval_df.groupby(
    [f't{i}' for i in range(128)]).count().sort_values(by='trace_id', ascending=False).reset_index()

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t122,t123,t124,t125,t126,t127,trace_id,trace_bool,error_trace_type,process_idx
0,0.523899,0.561488,0.536626,0.525553,0.528746,0.523446,0.524632,0.524321,0.515925,0.523767,...,0.531859,0.530798,0.507621,0.505172,0.519293,0.494150,450,450,450,450
1,0.524832,0.566131,0.538609,0.526158,0.529555,0.524238,0.525422,0.524837,0.515744,0.524631,...,0.533626,0.532105,0.506096,0.503320,0.519400,0.491100,360,360,360,360
2,0.523899,0.561488,0.536626,0.525553,0.528746,0.523446,0.524632,0.524321,0.515925,0.523767,...,0.531859,0.530798,0.507621,0.505172,0.519293,0.494150,349,349,349,349
3,0.524835,0.566139,0.538611,0.526160,0.529556,0.524235,0.525424,0.524839,0.515745,0.524632,...,0.533627,0.532108,0.506095,0.503322,0.519399,0.491097,214,214,214,214
4,0.523045,0.558384,0.534605,0.524065,0.527390,0.523268,0.523775,0.525467,0.516683,0.523804,...,0.530234,0.529756,0.507992,0.505874,0.518540,0.496474,103,103,103,103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28863,0.525428,0.569606,0.540094,0.526806,0.530597,0.524560,0.525659,0.525470,0.515505,0.525262,...,0.534530,0.533238,0.505141,0.501832,0.519730,0.488855,1,1,1,1
28864,0.525428,0.569605,0.540094,0.526801,0.530598,0.524559,0.525652,0.525472,0.515487,0.525260,...,0.534528,0.533235,0.505163,0.501829,0.519734,0.488866,1,1,1,1
28865,0.525428,0.569605,0.540093,0.526808,0.530597,0.524560,0.525657,0.525467,0.515506,0.525262,...,0.534530,0.533235,0.505142,0.501831,0.519731,0.488854,1,1,1,1
28866,0.525428,0.569605,0.540092,0.526805,0.530599,0.524562,0.525657,0.525472,0.515502,0.525262,...,0.534533,0.533236,0.505143,0.501830,0.519730,0.488860,1,1,1,1


In [69]:
target = pd.DataFrame(eval_df.groupby(
    [f't{i}' for i in range(128)]).count().sort_values(by='trace_id', ascending=False).reset_index().loc[7]
).T.drop(['trace_id', 'trace_bool','error_trace_type','process_idx'], axis=1)

target

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t118,t119,t120,t121,t122,t123,t124,t125,t126,t127
7,0.523045,0.558384,0.534605,0.524065,0.52739,0.523268,0.523775,0.525467,0.516683,0.523804,...,0.490113,0.505106,0.512296,0.531172,0.530234,0.529756,0.507992,0.505874,0.51854,0.496474


In [29]:
sample_df_list = []
for error_type in unique_error_types:
    sample_ = eval_df[eval_df.error_trace_type == error_type].sample(5)
    sample_df_list.append(sample_)

sample_df = pd.concat(sample_df_list)
sample_df

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t122,t123,t124,t125,t126,t127,trace_id,trace_bool,error_trace_type,process_idx
16503,0.522589,0.551613,0.532469,0.524686,0.526079,0.522341,0.523061,0.523464,0.517281,0.521951,...,0.527976,0.527203,0.511361,0.508635,0.518925,0.501105,21548,True,normal,1
25214,0.525426,0.569608,0.540112,0.526799,0.530606,0.524586,0.525653,0.525473,0.515477,0.525259,...,0.534530,0.533235,0.505164,0.501834,0.519710,0.488858,58980,True,normal,4
32570,0.525246,0.568906,0.540209,0.527371,0.530696,0.524129,0.525447,0.525238,0.515122,0.525084,...,0.534611,0.533288,0.505352,0.501669,0.519474,0.488543,12178,True,normal,0
22172,0.525427,0.569606,0.540090,0.526798,0.530598,0.524558,0.525659,0.525474,0.515490,0.525258,...,0.534528,0.533236,0.505165,0.501834,0.519732,0.488867,106323,True,normal,7
24849,0.525426,0.569606,0.540089,0.526801,0.530592,0.524557,0.525650,0.525469,0.515491,0.525261,...,0.534524,0.533236,0.505164,0.501828,0.519734,0.488867,10771,True,normal,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26014,0.524424,0.562737,0.535945,0.524051,0.528352,0.524022,0.523875,0.525357,0.516981,0.524490,...,0.532238,0.532134,0.506935,0.505561,0.518798,0.492988,132471,False,F04-02,8
20566,0.524425,0.562737,0.535946,0.524047,0.528350,0.524019,0.523871,0.525363,0.516982,0.524487,...,0.532237,0.532135,0.506936,0.505560,0.518802,0.493004,14692,False,F04-02,0
26830,0.524425,0.562737,0.535946,0.524047,0.528350,0.524019,0.523871,0.525363,0.516982,0.524487,...,0.532237,0.532135,0.506936,0.505560,0.518802,0.493004,58867,False,F04-02,3
2083,0.524425,0.562737,0.535946,0.524047,0.528350,0.524019,0.523871,0.525363,0.516982,0.524487,...,0.532237,0.532135,0.506936,0.505560,0.518802,0.493004,14691,False,F04-02,0


In [30]:
print('calc TSNE Embedding ..')
tsne_ = TSNE(n_components=2, init='random')
tsne_embeddings = tsne_.fit_transform(
    sample_df[[f't{i}' for i in range(128)]].values
)

sample_df['tsne_x'] = tsne_embeddings[:,0]
sample_df['tsne_y'] = tsne_embeddings[:,1]

sample_df

calc TSNE Embedding ..


Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t124,t125,t126,t127,trace_id,trace_bool,error_trace_type,process_idx,tsne_x,tsne_y
16503,0.522589,0.551613,0.532469,0.524686,0.526079,0.522341,0.523061,0.523464,0.517281,0.521951,...,0.511361,0.508635,0.518925,0.501105,21548,True,normal,1,-0.988254,-37.797302
25214,0.525426,0.569608,0.540112,0.526799,0.530606,0.524586,0.525653,0.525473,0.515477,0.525259,...,0.505164,0.501834,0.519710,0.488858,58980,True,normal,4,1.704527,19.991997
32570,0.525246,0.568906,0.540209,0.527371,0.530696,0.524129,0.525447,0.525238,0.515122,0.525084,...,0.505352,0.501669,0.519474,0.488543,12178,True,normal,0,7.023870,0.606537
22172,0.525427,0.569606,0.540090,0.526798,0.530598,0.524558,0.525659,0.525474,0.515490,0.525258,...,0.505165,0.501834,0.519732,0.488867,106323,True,normal,7,-0.082415,3.228133
24849,0.525426,0.569606,0.540089,0.526801,0.530592,0.524557,0.525650,0.525469,0.515491,0.525261,...,0.505164,0.501828,0.519734,0.488867,10771,True,normal,0,-1.032334,3.621895
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26014,0.524424,0.562737,0.535945,0.524051,0.528352,0.524022,0.523875,0.525357,0.516981,0.524490,...,0.506935,0.505561,0.518798,0.492988,132471,False,F04-02,8,-6.394208,-22.308435
20566,0.524425,0.562737,0.535946,0.524047,0.528350,0.524019,0.523871,0.525363,0.516982,0.524487,...,0.506936,0.505560,0.518802,0.493004,14692,False,F04-02,0,-6.631757,-21.938320
26830,0.524425,0.562737,0.535946,0.524047,0.528350,0.524019,0.523871,0.525363,0.516982,0.524487,...,0.506936,0.505560,0.518802,0.493004,58867,False,F04-02,3,-6.372866,-22.307997
2083,0.524425,0.562737,0.535946,0.524047,0.528350,0.524019,0.523871,0.525363,0.516982,0.524487,...,0.506936,0.505560,0.518802,0.493004,14691,False,F04-02,0,-6.631793,-21.938391


In [32]:
px.scatter(sample_df, x='tsne_x', y='tsne_y', color='error_trace_type',
#            text='trace_id',
           title='Sampled Trace Embeddings')

### F08-05

In [33]:
error_type = 'F08-05'

In [34]:
sample_error_df = eval_df[eval_df.error_trace_type.isin([error_type, 'normal'])].reset_index(drop=True)
sample_error_df

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t122,t123,t124,t125,t126,t127,trace_id,trace_bool,error_trace_type,process_idx
0,0.525433,0.569617,0.540100,0.526803,0.530608,0.524563,0.525649,0.525477,0.515480,0.525254,...,0.534524,0.533239,0.505165,0.501828,0.519733,0.488845,21592,True,normal,1
1,0.525432,0.569616,0.540100,0.526796,0.530612,0.524564,0.525652,0.525475,0.515477,0.525255,...,0.534524,0.533236,0.505161,0.501831,0.519733,0.488850,80003,True,normal,5
2,0.525577,0.570677,0.540690,0.527123,0.530843,0.524694,0.525749,0.525687,0.515409,0.525421,...,0.534742,0.533545,0.504862,0.501379,0.519622,0.488178,3106,True,normal,0
3,0.525245,0.568902,0.540220,0.527345,0.530689,0.524129,0.525449,0.525236,0.515116,0.525049,...,0.534605,0.533312,0.505371,0.501672,0.519513,0.488556,57055,True,normal,3
4,0.525425,0.569606,0.540092,0.526801,0.530596,0.524558,0.525650,0.525471,0.515491,0.525261,...,0.534526,0.533238,0.505163,0.501829,0.519736,0.488868,76153,True,normal,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21986,0.525878,0.569585,0.539878,0.526567,0.530764,0.524395,0.525320,0.525864,0.515197,0.525181,...,0.534252,0.533117,0.505057,0.501711,0.519721,0.488824,77717,True,normal,5
21987,0.525433,0.569612,0.540111,0.526802,0.530604,0.524583,0.525662,0.525476,0.515473,0.525259,...,0.534528,0.533232,0.505156,0.501835,0.519718,0.488850,77171,True,normal,5
21988,0.524852,0.566108,0.538749,0.526341,0.529822,0.524290,0.525507,0.524794,0.515646,0.524549,...,0.533516,0.532111,0.506067,0.503081,0.519532,0.490920,132052,True,normal,8
21989,0.525425,0.569607,0.540091,0.526801,0.530593,0.524559,0.525650,0.525471,0.515490,0.525262,...,0.534526,0.533236,0.505164,0.501827,0.519735,0.488868,84197,True,normal,5


In [46]:
scores = model.predict_score(sample_error_df.trace_id)
sample_error_df['score'] = scores.detach().numpy()

In [None]:
tsne_ = TSNE(n_components=2, init='random')
tsne_embeddings = tsne_.fit_transform(
    sample_error_df[[f't{i}' for i in range(128)]].values
)

sample_error_df['tsne_x'] = tsne_embeddings[:,0]
sample_error_df['tsne_y'] = tsne_embeddings[:,1]

In [47]:
sample_error_df

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t125,t126,t127,trace_id,trace_bool,error_trace_type,process_idx,tsne_x,tsne_y,score
0,0.525433,0.569617,0.540100,0.526803,0.530608,0.524563,0.525649,0.525477,0.515480,0.525254,...,0.501828,0.519733,0.488845,21592,True,normal,1,-51.262302,1.991487,7.307577e-09
1,0.525432,0.569616,0.540100,0.526796,0.530612,0.524564,0.525652,0.525475,0.515477,0.525255,...,0.501831,0.519733,0.488850,80003,True,normal,5,-60.488605,6.873729,7.275107e-09
2,0.525577,0.570677,0.540690,0.527123,0.530843,0.524694,0.525749,0.525687,0.515409,0.525421,...,0.501379,0.519622,0.488178,3106,True,normal,0,42.610344,35.893169,2.330538e-07
3,0.525245,0.568902,0.540220,0.527345,0.530689,0.524129,0.525449,0.525236,0.515116,0.525049,...,0.501672,0.519513,0.488556,57055,True,normal,3,22.710846,-60.807308,7.345420e-08
4,0.525425,0.569606,0.540092,0.526801,0.530596,0.524558,0.525650,0.525471,0.515491,0.525261,...,0.501829,0.519736,0.488868,76153,True,normal,5,-13.642004,-4.320409,6.994610e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21986,0.525878,0.569585,0.539878,0.526567,0.530764,0.524395,0.525320,0.525864,0.515197,0.525181,...,0.501711,0.519721,0.488824,77717,True,normal,5,21.127193,56.600887,5.226712e-08
21987,0.525433,0.569612,0.540111,0.526802,0.530604,0.524583,0.525662,0.525476,0.515473,0.525259,...,0.501835,0.519718,0.488850,77171,True,normal,5,-9.407398,44.942032,7.593430e-09
21988,0.524852,0.566108,0.538749,0.526341,0.529822,0.524290,0.525507,0.524794,0.515646,0.524549,...,0.503081,0.519532,0.490920,132052,True,normal,8,79.194786,-12.745059,1.716638e-06
21989,0.525425,0.569607,0.540091,0.526801,0.530593,0.524559,0.525650,0.525471,0.515490,0.525262,...,0.501827,0.519735,0.488868,84197,True,normal,5,-23.343779,-4.193397,6.987064e-09


In [38]:
px.scatter(sample_error_df, x='tsne_x', y='tsne_y', color='error_trace_type',
           title='Sampled Trace Embeddings',
           hover_name='trace_id'
          )

In [53]:
px.histogram(sample_error_df, x='score', color='error_trace_type')

In [54]:
normal_trace_id = 43087
error_trace_id = 23437

In [55]:
error1_df = sample_error_df[sample_error_df.trace_id.isin([normal_trace_id, error_trace_id])]
error1_df

Unnamed: 0,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,...,t125,t126,t127,trace_id,trace_bool,error_trace_type,process_idx,tsne_x,tsne_y,score
4289,0.525428,0.569602,0.540093,0.526805,0.530598,0.524561,0.525656,0.525469,0.515507,0.525261,...,0.501832,0.519729,0.488858,43087,True,normal,2,-18.029469,-64.804962,7.073518e-09
6962,0.525421,0.569611,0.540092,0.526807,0.530588,0.52456,0.525649,0.525473,0.515497,0.525262,...,0.501829,0.519735,0.488857,23437,False,F08-05,1,-17.118345,-77.112343,7.185746e-09


In [56]:
px.scatter(error1_df, x='tsne_x', y='tsne_y', color='error_trace_type', title='Sampled Trace Embeddings')

In [58]:
px.scatter(error1_df, x='trace_id', y='score', color='error_trace_type', title='Anomaly Score')

In [59]:
def get_feature_list(relation_idx):
    error1_ = pd.DataFrame(
        model.feature_list[relation_idx][model.feature_index[relation_idx][normal_trace_id]].detach().numpy(),
        columns=[f'f{i}' for i in range(77)]
    )
    error1_['trace_id'] = error_trace_id

    normal1_ = pd.DataFrame(
        model.feature_list[relation_idx][model.feature_index[relation_idx][error_trace_id]].detach().numpy(),
        columns=[f'f{i}' for i in range(77)]
    )
    normal1_['trace_id'] = normal_trace_id

    error1_feature_df = pd.concat([error1_, normal1_])

#     tsne_ = TSNE(n_components=2, init='random')
#     tsne_embeddings = tsne_.fit_transform(
#         error1_feature_df[[f'f{i}' for i in range(77)]].values
#     )
#     error1_feature_df['tsne_x'] = tsne_embeddings[:,0]
#     error1_feature_df['tsne_y'] = tsne_embeddings[:,1]
    
    error1_feature_df['relation_idx'] = relation_idx

#     fig = px.scatter(error1_feature_df, x='tsne_x',y='tsne_y',color='trace_id')
#     fig.show()
    
    return error1_feature_df

def viz_feature_list():
    error1_feature_df = []
    for relation_idx in range(len(relations)):
        error1_feature_df.append(get_feature_list(relation_idx))
        
    error1_feature_df = pd.concat(error1_feature_df)
    
    tsne_ = TSNE(n_components=2, init='random')
    tsne_embeddings = tsne_.fit_transform(
        error1_feature_df[[f'f{i}' for i in range(77)]].values
    )
    error1_feature_df['tsne_x'] = tsne_embeddings[:,0]
    error1_feature_df['tsne_y'] = tsne_embeddings[:,1]
    
    return error1_feature_df

error1_feature_df = viz_feature_list()
error1_feature_df['relation_idx'] = error1_feature_df['relation_idx'].astype(str)
error1_feature_df

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f71,f72,f73,f74,f75,f76,trace_id,relation_idx,tsne_x,tsne_y
0,0.000626,0.001337,-0.000056,0.000000e+00,0.000000,0.000000,0.000642,0.000627,0.001410,0.000714,...,0.0,0.0,0.0,0.0,0.0,0.0,23437,0,-0.574211,-10.290356
1,0.000627,0.000669,0.000904,2.156719e-07,0.000000,0.000035,0.000644,0.000628,0.000664,0.000714,...,0.0,0.0,0.0,0.0,0.0,0.0,23437,0,-9.138889,3.448514
2,0.000627,0.001421,0.000904,1.941047e-06,0.000000,0.000046,0.000646,0.000628,0.000664,0.000714,...,0.0,0.0,0.0,0.0,0.0,0.0,23437,0,11.463171,13.580715
0,0.000626,0.001337,-0.000056,0.000000e+00,0.000000,0.000000,0.000642,0.000623,0.000000,0.000714,...,0.0,0.0,0.0,0.0,0.0,0.0,43087,0,-8.620105,4.682839
1,0.000624,0.000000,0.000904,4.744783e-06,0.000000,0.000025,0.000637,0.000670,0.001410,0.000714,...,0.0,0.0,0.0,0.0,0.0,0.0,43087,0,12.439856,6.780695
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,0.000352,0.001429,0.001429,1.980859e-06,0.000343,0.000021,0.000370,0.000352,0.001171,0.001171,...,0.0,0.0,0.0,0.0,0.0,0.0,23437,45,-4.823930,-9.937412
5,0.000352,0.001429,0.001429,2.160938e-06,0.000343,0.000023,0.000370,0.000352,0.001102,0.001102,...,0.0,0.0,0.0,0.0,0.0,0.0,23437,45,-3.976122,-9.381495
6,0.000346,0.000673,0.000673,2.881250e-06,0.000343,0.000028,0.000352,0.000353,0.000551,0.000551,...,0.0,0.0,0.0,0.0,0.0,0.0,23437,45,9.500286,8.965086
0,0.000351,0.001345,0.001345,5.402344e-06,0.000343,0.000004,0.000369,0.000351,0.001102,0.001102,...,0.0,0.0,0.0,0.0,0.0,0.0,43087,45,12.569774,13.916390


In [82]:
px.scatter(error1_feature_df, x='tsne_x', y='tsne_y', 
           color='trace_id',
           facet_col='relation_idx',
           facet_col_wrap=3,
           facet_row_spacing=0.01,
           width=1000,
           height=2000,
           hover_name='trace_id')

(71,)