In [8]:
import pandas as pd

# Load data

In [9]:
results_file = "sz3-02-algorithm-error-results/results.jsonl"
raw = pd.read_json(results_file, lines=True)
df = pd.json_normalize(raw.to_dict(orient="records"), sep=".")

# Make distortion values NaN for 0 truncated bits
df.loc[df['results.mse'] == 0, ['results.mse']] = float('nan')
df.loc[df['results.ks_statistic'] == 0, ['results.ks_statistic']] = float('nan')
df.loc[df['results.earth_mover_distance'] == 0, ['results.earth_mover_distance']] = float('nan')
df.loc[df['results.jensen_shannon_divergence'] == 0, ['results.jensen_shannon_divergence']] = float('nan')

# Restrict to single branch
# df = df[df['config.branches'] == 'AnalysisJetsAuxDyn.pt']

# Visualization

In [10]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [12]:
# Color mapping: group by variable type (.pt, .eta, .phi) with shades within each family
algorithms = df['config.compressor_config.cmprAlgo'].unique()

algo_colors = {
    '0': '#636EFA',  # Blue
    '1': '#EF553B',  # Red  
    '2': '#00CC96',  # Green
    '3': '#AB63FA',  # Purple
}

algo_names = {
    '0': 'ALGO_LORENZO',
    '1': 'ALGO_INTERP_LORENZO',
    '2': 'ALGO_INTERP',
    '3': 'ALGO_NOPRED',
}

# Ensure relErrorBound is numeric and get tick values
df['config.compressor_config.relErrorBound'] = pd.to_numeric(df['config.compressor_config.relErrorBound'])
tickvals = sorted(df['config.compressor_config.relErrorBound'].unique())

# Performance metrics
fig_system = make_subplots(
    rows=3, cols=3,
    subplot_titles=(
        'Compression Ratio',
        'Compression Throughput (MB/s)',
        'Decompression Throughput (MB/s)',        
    ),
)

branches = ['AnalysisJetsAuxDyn.pt', 'AnalysisJetsAuxDyn.eta', 'AnalysisJetsAuxDyn.phi']
for branch_idx, branch in enumerate(branches):
    branch_df = df[df['config.branches'] == branch]
    
    for algorithm in algorithms:
        alg_data = branch_df[branch_df['config.compressor_config.cmprAlgo'] == algorithm]
        
        fig_system.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'], 
            y=alg_data['results.compression_ratio'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=(branch_idx + 1 == 1),
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=1)

        fig_system.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'], 
            y=alg_data['results.compression_throughput_mbps'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=False,
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=2)
        
        fig_system.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'], 
            y=alg_data['results.decompression_throughput_mbps'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=False,
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=3)
        
        
# Quality metrics

fig_quality = make_subplots(
    rows=3, cols=5,
    subplot_titles=(
        'Compression Ratio',
        'PSNR (dB)',
        'K-S Statistic',
        'Earth Mover\'s Distance',
        'Jensen-Shannon Divergence'
    ),
)
        
for branch_idx, branch in enumerate(branches):
    branch_df = df[df['config.branches'] == branch]
    for algorithm in algorithms:
        alg_data = branch_df[branch_df['config.compressor_config.cmprAlgo'] == algorithm]
        
        fig_quality.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'], 
            y=alg_data['results.compression_ratio'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=(branch_idx + 1 == 1),
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=1)
        
        fig_quality.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'],
            y=alg_data['results.psnr'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=False,
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=2)

        fig_quality.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'], 
            y=alg_data['results.ks_statistic'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=False,
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=3)
        
        fig_quality.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'], 
            y=alg_data['results.earth_mover_distance'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=False,
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=4)
        
        fig_quality.add_trace(go.Scatter(
            x=alg_data['config.compressor_config.relErrorBound'], 
            y=alg_data['results.jensen_shannon_divergence'],
            mode='lines+markers',
            name=f"{algo_names[algorithm]}",
            legendgroup=f"{algo_names[algorithm]}",
            showlegend=False,
            line=dict(color=algo_colors[algorithm])
        ), row=branch_idx + 1, col=5)
        
ticklabels = ['0.0001%', '0.001%', '0.01%','0.1%', '1%']
    
fig_system.update_xaxes(type='log', tickvals=tickvals, ticktext=ticklabels)
fig_system.update_xaxes(title_text='Chunk Size (KB)', row=3, col=2)

for branch_idx, branch in enumerate(branches):
    fig_system.update_yaxes(title_text=branch, row=branch_idx + 1, col=1)

fig_system.update_layout(
    height=300 * 3,
    width=400 * 3,
    title_text="SZ3: Effect of compression algorithm on performance metrics",
)

fig_quality.update_xaxes(type='log', tickvals=tickvals, ticktext=ticklabels)

fig_quality.update_yaxes(type='log', row=1, col=2)  # K-S on log scale
fig_quality.update_yaxes(type='log', row=2, col=1)  # EMD on log scale
fig_quality.update_yaxes(type='log', row=2, col=2)  # JS Divergence on log scale
fig_quality.update_layout(
    height=300 * 3,
    width=400 * 5,
    title_text="SZ3: Effect of compression algorithm on quality metrics",
)

fig_system.show()
fig_quality.show()