In [None]:
import os
import re
import pandas as pd
import plotly.express as px

# Directory corrente
folder = '.'

# Pattern file
pattern = re.compile(r'output-Test-2025-.*')

# Regex per estrazione
regex_aq_line = re.compile(r'A_Q\s*=\s*([0-9.]+).*?zstd_ratio\s*=\s*([0-9.]+)%')
regex_quant_block = re.compile(r'Accuracy from ([0-9.]+) to ([0-9.]+)')
regex_zstd_ratio = re.compile(r'zstd_ratio\s*=\s*([0-9.]+)%')

# Per salvare i dati
data = []

# Scorri i file
for filename in os.listdir(folder):
    if pattern.match(filename):
        filepath = os.path.join(folder, filename)
        with open(filepath, 'r', encoding='utf-8') as f:
            lines = f.readlines()
            for i, line in enumerate(lines):
                # Caso 1: Riga con A_Q e zstd_ratio
                match = regex_aq_line.search(line)
                if match:
                    accuracy = float(match.group(1))
                    zstd_ratio = float(match.group(2))
                    data.append({'accuracy': accuracy, 'zstd_ratio': zstd_ratio, 'source': filename})
                
                # Caso 2: Blocco "Accuracy from ... to ..."
                match = regex_quant_block.search(line)
                if match and i + 1 < len(lines):
                    accuracy = float(match.group(2))  # il valore dopo "to"
                    match_ratio = regex_zstd_ratio.search(lines[i + 1])
                    if match_ratio:
                        zstd_ratio = float(match_ratio.group(1))
                        data.append({'accuracy': accuracy, 'zstd_ratio': zstd_ratio, 'source': filename})

# Costruzione DataFrame
df = pd.DataFrame(data)

# Frontiera efficiente: punti non dominati
def efficient_frontier(df):
    # Ordina per zstd_ratio crescente, poi accuracy decrescente
    df_sorted = df.sort_values(by=['zstd_ratio', 'accuracy'], ascending=[True, False])
    frontier = []
    max_acc = -1
    for _, row in df_sorted.iterrows():
        if row['accuracy'] > max_acc:
            frontier.append(row)
            max_acc = row['accuracy']
    return pd.DataFrame(frontier)

frontier_df = efficient_frontier(df)

# Plot interattivo
fig = px.scatter(df, x='accuracy', y='zstd_ratio', hover_data=['source'],
                 title='Accuracy vs Compression Ratio (zstd)',
                 labels={'accuracy': 'Accuracy (%)', 'zstd_ratio': 'Compression Ratio (%)'})

# Aggiunta frontiera
fig.add_scatter(x=frontier_df['accuracy'], y=frontier_df['zstd_ratio'],
                mode='lines+markers', name='Frontiera Efficiente',
                line=dict(color='red', width=2, dash='dash'))

fig.show()
