# Visualisation Run Time

This notebook will read the txt files containing the run time of all models over the different synthetic data sets. The results will be structured and visualised to be included in the research paper. 

The results are not finalised, as some experiments are still running. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
models = [
    'flowscope',
    'autoaudit', 
    'gargaml_undir', 
    'gargaml_dir'
    ]

In [None]:
model_time = {}
for model in models:
    # read file line by line, and store the time in a dictionary
    with open(f'../results/time_{model}.txt', 'r') as f:
        line = f.readline()
        time_dict = {}
        while line:
            line = line.split()
            time_dict[line[0]] = float(line[-1])
            line = f.readline()
    model_time[model] = time_dict

In [None]:
model_time

In [None]:
methods_list = []
length_list = []
time_list = []
for key, value in model_time.items():
    for k, v in value.items():
        methods_list.append(key)
        split_list = k.split('_')
        try:
            length_list.append(int(split_list[2]))
        except:
            length_list.append(split_list[0])
        time_list.append(v)

df = pd.DataFrame({'method': methods_list, 'length': length_list, 'time': time_list})

In [None]:
df

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: Define the order of observation sizes
order = [100, 10000, 100000]

# Create the boxplot
plt.figure(figsize=(8, 6))
# Create the boxplot
ax = sns.boxplot(x='length', y='time', hue='method', data=df, order=order)
ax.set_yscale('log')
plt.xlabel('Number of Observations')
plt.ylabel('Time (seconds)')
plt.title('Calculation Time by Method and Network Size')
plt.grid(True,which="both",ls="--",c='gray', alpha=0.3) 
# Now get the handles and labels that Seaborn already created:
#handles, labels = ax.get_legend_handles_labels()

# re‐draw the legend at the desired location
#ax.legend(handles=handles,
#          labels=['FlowScope','AutoAudit','GARG-AML'],
#          title='Method',
#          loc='upper left')
plt.savefig('../results/time_boxplot_log.pdf')


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: Define the order of observation sizes
order = [100, 10000, 100000]

# Create the boxplot
plt.figure(figsize=(8, 6))
# Create the boxplot
ax = sns.boxplot(x='length', y='time', hue='method', data=df, order=order)

plt.xlabel('Number of Observations')
plt.ylabel('Time (seconds)')
plt.title('Calculation Time by Method and Network Size - Excluding Out-of-Time Values')
plt.grid(True,which="both",ls="--",c='gray', alpha=0.3)  
#handles, labels = ax.get_legend_handles_labels()

# re‐draw the legend at the desired location
#ax.legend(handles=handles,
#          labels=['FlowScope','AutoAudit','GARG-AML'],
#          title='Method',
#          loc='upper left')
plt.savefig('../results/time_boxplot_norm.pdf')

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: Define the order of observation sizes
order = [100, 10000, 100000]

# Create the boxplot
plt.figure(figsize=(8, 6))
# Create the boxplot
ax = sns.boxplot(x='length', y='time', hue='method', data=df, order=order)

plt.xlabel('Number of Observations')
plt.ylim(0, 1750)
plt.ylabel('Time (seconds)')
plt.title('Calculation Time by Method and Network Size - Excluding Out-of-Time Values')
plt.grid(True,which="both",ls="--",c='gray', alpha=0.3)  
plt.savefig('../results/time_boxplot_norm_1750.pdf')


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: Define the order of observation sizes
order = [100, 10000, 100000]

# Create the boxplot
plt.figure(figsize=(8, 6))
# Create the boxplot
ax = sns.boxplot(x='length', y='time', hue='method', data=df, order=order)

plt.xlabel('Number of Observations')
plt.ylim(0, 750)
plt.ylabel('Time (seconds)')
plt.title('Calculation Time by Method and Network Size - Excluding Out-of-Time Values')
plt.grid(True,which="both",ls="--",c='gray', alpha=0.3)  
plt.savefig('../results/time_boxplot_norm_750.pdf')

# IBM Data

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Optional: Define the order of observation sizes
order = ['HI-Small', 'LI-Large']

custom_palette = {
    'flowscope': 'tab:blue',
    'gargaml_undir': 'tab:green',
    'gargaml_dir': 'tab:red'
}

# Create the boxplot
plt.figure(figsize=(8, 6))
# Create the boxplot
ax = sns.barplot(
    x='length', 
    y='time', 
    hue='method', 
    data=df[df['method']!='autoaudit'], 
    order=order,
    palette=custom_palette,
    edgecolor='black',
    )
#ax.set_yscale('log')
plt.xlabel('Data Set')
plt.ylabel('Time (seconds)')
#ax.set_ylim(bottom=1)
plt.title('Calculation Time by Method and Network')
plt.grid(True,which="both",ls="--",c='gray', alpha=0.3) 

plt.savefig('../results/time_boxplot_ibm.pdf')
