### Analyse the output file to retrive statistics on the run

In [9]:
import re
import ast
import numpy as np
import glob
import os
import pandas as pd

from plotly import graph_objects as go

In [10]:
output_files = "../cluster/outputs/new_means/*.out"

if isinstance(output_files, str):
    output_files = sorted(glob.glob(output_files))
    if not output_files:
        print("No output files found.")
        exit(1)
  
print(f"Found {len(output_files)} output files.")

Found 67 output files.


In [11]:
def parse_output_file(output_file):
    """Parse the output file and extract relevant statistics."""
    print(f"Parsing output file: {output_file}")

    # Statistics from the output file
    with open(output_file, 'r') as file:
        content = file.readlines()
        processed_content = []
        for line in content:
            if '.-' in line:
                split_lines = line.replace('.-', '.\n-').splitlines()
                processed_content.extend(split_lines)
            else:
                processed_content.append(line)
        content = [line for line in processed_content if line.strip()]

    results = {
        "output_file": os.path.basename(output_file),
        'parameters': {},
        'attack_times': []
    }

    # Extract dictionary from the first line using ast.literal_eval for safety
    parameters_line = content[0]
    results['parameters'] = ast.literal_eval(parameters_line.strip().split("Parameters: ")[1])

    # Iterate through the file content and parse relevant data
    for counter in range(1, len(content)):
        line = content[counter]
        attack_time_match = re.match(r"Attack \d+/\d+ completed in ([\d.]+) seconds\.", line)
        if attack_time_match:
            attack_time = float(attack_time_match.group(1))
            results['attack_times'].append(attack_time)
        

    return results

Parameters used:

In [48]:
results = []
for output_file in output_files:
    result = parse_output_file(output_file)
    results.append(result)

# Prepare a list to collect rows for the DataFrame
rows = []

for result in results:
    params = result['parameters']
    matrix_config = params.get('matrix_config', None)
    if matrix_config not in ['salsa', 'dual']:
        continue  # skip unknown configs
    row = {
        'n': params.get('n'),
        'q': params.get('q'),
        'secret_type': params.get('secret_type'),
        'eta': params.get('eta'),
        'penalty': params.get('penalty'),
        'bkz_block_sizes': params.get('bkz_block_sizes'),
        'matrix_config': matrix_config,
        'mean': round(np.mean(result['attack_times']), 2) if result['attack_times'] else np.nan,
        'std': round(np.std(result['attack_times']), 2) if result['attack_times'] else np.nan,
        'num_attacks': len(result['attack_times']) if result['attack_times'] else 0,
        'output_file': result['output_file']
    }
    rows.append(row)

df_stats = pd.DataFrame(rows)

# Pivot to get mean/std/count/output_file for salsa and dual in separate columns
df_summary = df_stats.pivot_table(
    index=['n', 'q', 'secret_type', 'eta', 'penalty', 'bkz_block_sizes'],
    columns='matrix_config',
    values=['mean', 'std', 'num_attacks', 'output_file'],
    aggfunc={'mean': 'mean', 'std': 'mean', 'num_attacks': 'sum', 'output_file': lambda x: ','.join(x)}
).reset_index()

# Flatten MultiIndex columns
df_summary.columns = ['_'.join(col).strip('_') for col in df_summary.columns.values]

Parsing output file: ../cluster/outputs/new_means/attack_6338001.out
Parsing output file: ../cluster/outputs/new_means/attack_6338002.out
Parsing output file: ../cluster/outputs/new_means/attack_6338003.out
Parsing output file: ../cluster/outputs/new_means/attack_6338004.out
Parsing output file: ../cluster/outputs/new_means/attack_6338005.out
Parsing output file: ../cluster/outputs/new_means/attack_6338006.out
Parsing output file: ../cluster/outputs/new_means/attack_6338007.out
Parsing output file: ../cluster/outputs/new_means/attack_6338008.out
Parsing output file: ../cluster/outputs/new_means/attack_6338009.out
Parsing output file: ../cluster/outputs/new_means/attack_6338010.out
Parsing output file: ../cluster/outputs/new_means/attack_6338011.out
Parsing output file: ../cluster/outputs/new_means/attack_6338012.out
Parsing output file: ../cluster/outputs/new_means/attack_6338013.out
Parsing output file: ../cluster/outputs/new_means/attack_6338014.out
Parsing output file: ../cluster/ou

In [52]:
df_summary_sub = df_summary[
  (df_summary['eta'] == 3) &
  (df_summary['secret_type'] == 'binary') &
  (df_summary['penalty'] == 4)
]
df_summary_sub

Unnamed: 0,n,q,secret_type,eta,penalty,bkz_block_sizes,mean_dual,mean_salsa,num_attacks_dual,num_attacks_salsa,output_file_dual,output_file_salsa,std_dual,std_salsa
1,32,3329,binary,3,4,4:42:2,9.21,3.07,10,10,attack_6338003.out,attack_6338004.out,2.13,1.25
5,40,3329,binary,3,4,4:42:2,13.29,6.44,10,10,attack_6338007.out,attack_6338008.out,1.21,1.7
9,50,3329,binary,3,4,4:42:2,59.44,15.73,10,10,attack_6338011.out,attack_6338012.out,2.98,2.95
13,64,3329,binary,3,4,4:42:2,89.94,24.46,10,10,attack_6338015.out,attack_6338016.out,10.64,1.69
17,70,3329,binary,3,4,4:42:2,131.18,24.48,10,10,attack_6338019.out,attack_6338020.out,10.52,2.36
21,80,3329,binary,3,4,4:42:2,223.46,43.68,10,10,attack_6338023.out,attack_6338024.out,10.11,6.0
25,90,3329,binary,3,4,4:42:2,285.36,438.74,10,10,attack_6338027.out,attack_6338028.out,9.77,128.74
29,100,3329,binary,3,4,4:42:2,454.22,681.94,20,10,"attack_6338031.out,attack_6340216.out",attack_6338032.out,14.035,35.91
31,110,3329,binary,3,4,4:42:2,1629.4,2087.31,10,10,attack_6338035.out,attack_6338036.out,562.11,977.87


In [53]:
fig = go.Figure()

df_summary_sub = df_summary_sub.dropna(subset=['mean_dual', 'mean_salsa', 'std_dual', 'std_salsa'])

# Plot mean_dual with std as a filled area
fig.add_trace(go.Scatter(
    x=df_summary_sub['n'],
    y=df_summary_sub['mean_dual'],
    mode='lines+markers',
    name='dual',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=pd.concat([df_summary_sub['n'], df_summary_sub['n'].iloc[::-1]], axis=0),
    y=pd.concat([
        df_summary_sub['mean_dual'] + df_summary_sub['std_dual'],
        (df_summary_sub['mean_dual'] - df_summary_sub['std_dual']).iloc[::-1]
    ], axis=0),
    fill='toself',
    fillcolor='rgba(0, 0, 255, 0.15)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo="skip",
    showlegend=False
))

# Plot mean_salsa with std as a filled area
fig.add_trace(go.Scatter(
    x=df_summary_sub['n'],
    y=df_summary_sub['mean_salsa'],
    mode='lines+markers',
    name='salsa',
    line=dict(color='red')
))

fig.add_trace(go.Scatter(
    x=pd.concat([df_summary_sub['n'], df_summary_sub['n'].iloc[::-1]], axis=0),
    y=pd.concat([
        df_summary_sub['mean_salsa'] + df_summary_sub['std_salsa'],
        (df_summary_sub['mean_salsa'] - df_summary_sub['std_salsa']).iloc[::-1]
    ], axis=0),
    fill='toself',
    fillcolor='rgba(255, 0, 0, 0.15)',
    line=dict(color='rgba(255,255,255,0)'),
    hoverinfo="skip",
    showlegend=False
))

fig.update_layout(
    xaxis_title='n',
    yaxis_title='Seconds (log scale)',
    #legend_title='Lattice Embedding',
    showlegend=False,
    width=800,
    height=600,
    template='plotly_white',
    autosize=True,
    margin=dict(l=40, r=20, t=40, b=40),
    yaxis_type='log'
)

fig.show()