# Notes for decentmon data processing

In [44]:
import pandas as pd

from decmon.extractor import *
from decmon.cleaner import *
from decmon.filter import *
from decmon.plotter import *

In [45]:
# Constants
# FILENAME = "sample_log.txt"
FILENAME = "2022-09-07_12-06-13_output.log"

In [46]:
df = pd.read_csv(FILENAME, sep='@')
df.insert(0, 'formula_id', range(0, len(df)))
df.columns = df.columns.str.strip()
df

Unnamed: 0,formula_id,x,cent_trace_len,decent_trace_len,odecent_trace_len,cent_num_mess,decent_num_mess,odecent_num_mess,cent_size_mess,decent_size_mess,odecent_size_mess,cent_nb_progressions,decent_nb_progressions,odecent_nb_progressions,formula,trace
0,0,1,2,3,2,3,0,0,1.0,0.00,0.00,3,3,3,"Next (Var ""b"")",{a| |c} ; {a| |c} ; {a| | } ; { | | } ; {a| |...
1,1,1,2,3,2,3,0,0,1.0,0.00,0.00,3,3,3,"Next (Var ""c"")",{ | |c} ; { |b| } ; { |b|c} ; {a| |c} ; { | |...
2,2,1,2,3,2,3,0,0,1.0,0.00,0.00,3,3,3,"Next (Var ""a"")",{a|b|c} ; {a| |c} ; { |b| } ; {a|b| } ; {a|b|...
3,3,1,2,3,2,3,0,0,1.0,0.00,0.00,3,3,3,"Next (Var ""c"")",{ | |c} ; {a| | } ; { |b|c} ; { | |c} ; {a|b|...
4,4,1,2,3,2,3,2,0,1.0,1.66,0.00,4,8,4,"Glob (Var ""b"")",{a|b| } ; {a| |c} ; {a|b| } ; { |b|c} ; {a| |...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3036,3036,5,20,22,21,57,22,79,1.0,36.46,10.76,962,3207,1153,"Until (Or (Ev (Next (And (Var ""a"", Var ""c"")))...",{ | | } ; { |b|c} ; { | | } ; { |b| } ; {a| |...
3037,3037,5,17,19,18,48,35,51,1.0,46.27,7.88,999,3308,1041,"Until (Or (Var ""c"", Next (Until (Var ""b"", Var...",{ | | } ; {a| |c} ; { |b| } ; {a| |c} ; {a|b|...
3038,3038,5,21,23,22,60,6,21,1.0,0.77,2.76,120,240,143,"Next (Next (Or (Glob (Var ""a""), Ev (And (Var ...",{a| |c} ; { |b|c} ; { |b|c} ; { | |c} ; { | |...
3039,3039,5,23,25,25,66,18,48,1.0,5.61,5.66,240,677,310,"Next (Glob (Until (Until (Var ""b"", Var ""a""), ...",{ | |c} ; {a|b| } ; {a| |c} ; {a| | } ; {a| |...


In [47]:
strategies = {
    "cent" : "orchestration",
    "decent" : "migration",
    "odecent" : "choreography"
}

In [48]:
strategies_df = []
for key, name in strategies.items():
    others = {x: strategies[x] for x in strategies if x != key}
    other_keys = list(others.keys())
    local = rename(df, fr"^{key}_(.*)", r"\1")
    local = exclude_annotate(local, exclude=other_keys, annotate=name)
    strategies_df.append(local)

(cent, decent, odecent) = strategies_df
cent

Unnamed: 0,formula_id,x,trace_len,num_mess,size_mess,nb_progressions,formula,trace,strategy
0,0,1,2,3,1.0,3,"Next (Var ""b"")",{a| |c} ; {a| |c} ; {a| | } ; { | | } ; {a| |...,orchestration
1,1,1,2,3,1.0,3,"Next (Var ""c"")",{ | |c} ; { |b| } ; { |b|c} ; {a| |c} ; { | |...,orchestration
2,2,1,2,3,1.0,3,"Next (Var ""a"")",{a|b|c} ; {a| |c} ; { |b| } ; {a|b| } ; {a|b|...,orchestration
3,3,1,2,3,1.0,3,"Next (Var ""c"")",{ | |c} ; {a| | } ; { |b|c} ; { | |c} ; {a|b|...,orchestration
4,4,1,2,3,1.0,4,"Glob (Var ""b"")",{a|b| } ; {a| |c} ; {a|b| } ; { |b|c} ; {a| |...,orchestration
...,...,...,...,...,...,...,...,...,...
3036,3036,5,20,57,1.0,962,"Until (Or (Ev (Next (And (Var ""a"", Var ""c"")))...",{ | | } ; { |b|c} ; { | | } ; { |b| } ; {a| |...,orchestration
3037,3037,5,17,48,1.0,999,"Until (Or (Var ""c"", Next (Until (Var ""b"", Var...",{ | | } ; {a| |c} ; { |b| } ; {a| |c} ; {a|b|...,orchestration
3038,3038,5,21,60,1.0,120,"Next (Next (Or (Glob (Var ""a""), Ev (And (Var ...",{a| |c} ; { |b|c} ; { |b|c} ; { | |c} ; { | |...,orchestration
3039,3039,5,23,66,1.0,240,"Next (Glob (Until (Until (Var ""b"", Var ""a""), ...",{ | |c} ; {a|b| } ; {a| |c} ; {a| | } ; {a| |...,orchestration


In [49]:
sdf = pd.concat(strategies_df, ignore_index=True)
sdf

Unnamed: 0,formula_id,x,trace_len,num_mess,size_mess,nb_progressions,formula,trace,strategy
0,0,1,2,3,1.00,3,"Next (Var ""b"")",{a| |c} ; {a| |c} ; {a| | } ; { | | } ; {a| |...,orchestration
1,1,1,2,3,1.00,3,"Next (Var ""c"")",{ | |c} ; { |b| } ; { |b|c} ; {a| |c} ; { | |...,orchestration
2,2,1,2,3,1.00,3,"Next (Var ""a"")",{a|b|c} ; {a| |c} ; { |b| } ; {a|b| } ; {a|b|...,orchestration
3,3,1,2,3,1.00,3,"Next (Var ""c"")",{ | |c} ; {a| | } ; { |b|c} ; { | |c} ; {a|b|...,orchestration
4,4,1,2,3,1.00,4,"Glob (Var ""b"")",{a|b| } ; {a| |c} ; {a|b| } ; { |b|c} ; {a| |...,orchestration
...,...,...,...,...,...,...,...,...,...
9118,3036,5,21,79,10.76,1153,"Until (Or (Ev (Next (And (Var ""a"", Var ""c"")))...",{ | | } ; { |b|c} ; { | | } ; { |b| } ; {a| |...,choreography
9119,3037,5,18,51,7.88,1041,"Until (Or (Var ""c"", Next (Until (Var ""b"", Var...",{ | | } ; {a| |c} ; { |b| } ; {a| |c} ; {a|b|...,choreography
9120,3038,5,22,21,2.76,143,"Next (Next (Or (Glob (Var ""a""), Ev (And (Var ...",{a| |c} ; { |b|c} ; { |b|c} ; { | |c} ; { | |...,choreography
9121,3039,5,25,48,5.66,310,"Next (Glob (Until (Until (Var ""b"", Var ""a""), ...",{ | |c} ; {a|b| } ; {a| |c} ; {a| | } ; {a| |...,choreography


In [50]:
#gsdf = sdf[sdf['nb_progressions'].isin(sdf.groupby(['formula', 'trace']).mean()['nb_progressions'].values)]
#gsdf = sdf.loc[sdf.groupby('formula')['trace'].idxmin()]
#gsdf = sdf.groupby(['formula', 'trace']).min()
#gsdf#.apply(print)
#gsdf.describe()
#sdf['nb_progressions'].isin(sdf.groupby(['formula', 'trace']).min()['nb_progressions'].values)


gsdf = sdf.set_index(['formula', 'trace'])
#.std()['nb_progressions'].values)
gsdf[gsdf['nb_progressions'].isin(gsdf.groupby((['formula', 'trace'])).mean()['nb_progressions'].values > 0)]

Unnamed: 0_level_0,Unnamed: 1_level_0,formula_id,x,trace_len,num_mess,size_mess,nb_progressions,strategy
formula,trace,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1


In [51]:
fff = sdf.groupby(['formula', 'trace'])
# for key, item in fff.head(5):
#     print(fff.get_group(key), "\n\n")
fff.head(5)

Unnamed: 0,formula_id,x,trace_len,num_mess,size_mess,nb_progressions,formula,trace,strategy
0,0,1,2,3,1.00,3,"Next (Var ""b"")",{a| |c} ; {a| |c} ; {a| | } ; { | | } ; {a| |...,orchestration
1,1,1,2,3,1.00,3,"Next (Var ""c"")",{ | |c} ; { |b| } ; { |b|c} ; {a| |c} ; { | |...,orchestration
2,2,1,2,3,1.00,3,"Next (Var ""a"")",{a|b|c} ; {a| |c} ; { |b| } ; {a|b| } ; {a|b|...,orchestration
3,3,1,2,3,1.00,3,"Next (Var ""c"")",{ | |c} ; {a| | } ; { |b|c} ; { | |c} ; {a|b|...,orchestration
4,4,1,2,3,1.00,4,"Glob (Var ""b"")",{a|b| } ; {a| |c} ; {a|b| } ; { |b|c} ; {a| |...,orchestration
...,...,...,...,...,...,...,...,...,...
9118,3036,5,21,79,10.76,1153,"Until (Or (Ev (Next (And (Var ""a"", Var ""c"")))...",{ | | } ; { |b|c} ; { | | } ; { |b| } ; {a| |...,choreography
9119,3037,5,18,51,7.88,1041,"Until (Or (Var ""c"", Next (Until (Var ""b"", Var...",{ | | } ; {a| |c} ; { |b| } ; {a| |c} ; {a|b|...,choreography
9120,3038,5,22,21,2.76,143,"Next (Next (Or (Glob (Var ""a""), Ev (And (Var ...",{a| |c} ; { |b|c} ; { |b|c} ; { | |c} ; { | |...,choreography
9121,3039,5,25,48,5.66,310,"Next (Glob (Until (Until (Var ""b"", Var ""a""), ...",{ | |c} ; {a|b| } ; {a| |c} ; {a| | } ; {a| |...,choreography


In [52]:
metrics = [
    "trace_len",
    "num_mess",
    "size_mess",
    "nb_progressions"
]

In [53]:
metrics_data = []
for metric in metrics:
    metrics_data.append(select_metric(sdf, metric))

merged_metrics = pd.concat(metrics_data)

(trace_length, num_mess, size_mess, nb_progressions) = merged_metrics

merged_metrics.max()

formula_id             3040
strategy      orchestration
value               47303.0
metric            trace_len
dtype: object

In [54]:
map_ops = lambda x: flatten_once(count_all_ops(x['formula']))

f_ops = sdf.copy()
f_ops['newcolumn'] = f_ops.apply(map_ops, axis=1)
ops = flatten_once(all_operators)
f_ops = pd.DataFrame(f_ops['newcolumn'].to_list(), columns=ops)

f_ops

NameError: name 'flatten_once1' is not defined

Formula Duplication rate:

In [None]:
(len(cent['formula'].values) - len(cent['formula'].value_counts())) / len(cent['formula'].values)

Encoding Duplication rate:

In [None]:
(len(f_ops) - len(f_ops.drop_duplicates())) / len(f_ops)

In [None]:
TRACE_LENGTH = 1001
NODES = 3

In [None]:
map_trace = lambda x: flatten_once(extract_sampled_events(x['trace']))
trace_label = lambda n: [f"t_{x // NODES}_{x % NODES}" for x in range(n)]

t_events = sdf.copy()
t_events['newcolumn'] = t_events.apply(map_trace, axis=1)
ids = trace_label(TRACE_LENGTH * NODES)
listed_values = t_events['newcolumn'].to_list()
transformed_values = list(map(lambda ls: list(map(convert_event_to_int, ls)), listed_values))
t_events = pd.DataFrame(transformed_values, columns=ids)


t_events

In [None]:
expanded_df = f_ops.merge(t_events, left_index=True, right_index=True)
expanded_df = sdf.merge(expanded_df, left_index=True, right_index=True)
expanded_df

## Plotting

In [None]:
# Plotting
to_plot = sdf.drop(['formula_id', 'x'], axis=1)
plot_barplot(to_plot)

In [None]:
merged_metrics.max()

In [None]:
grid_plot = plot_grid_barplots(merged_metrics, grid_cell_field= 'metric',
          y_axis = 'value', x_axis = 'strategy',
          order = ['orchestration', 'migration', 'choreography'])

grid_plot.savefig("output/out.png")

In [None]:
ndf = merged_metrics.copy()
ndf['strategy'] = ndf['strategy'].str.replace("^cent_", "1", regex=True)
ndf['strategy'] = ndf['strategy'].str.replace("^decent_", "2", regex=True)
ndf['strategy'] = ndf['strategy'].str.replace("^odecent_", "3", regex=True)
ndf

In [None]:
expanded_df.strategy = pd.Categorical(expanded_df.strategy)
expanded_df['strategy_id'] = expanded_df.strategy.cat.codes
expanded_df