### Imports and GUI Code

In [1]:
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, Markdown

# Output widgets for separate displays2
info_output_initial = widgets.Output()
info_output_last_successful = widgets.Output()

# Additional output widget for the CSV overview
csv_overview_output = widgets.Output()

def construct_filepath(entropy, output_number, attack_id):
    return f"./results/stationary_distribution/robustness_analysis/entropy_{entropy}/output_{output_number}/corpuses/attack_{attack_id}.csv"

def display_csv_overview(entropy, output_number, attack_id, quality_preserved_only, ignore_short_texts):
    csv_filepath = construct_filepath(entropy, output_number, attack_id)
    with csv_overview_output:
        csv_overview_output.clear_output()
        if not os.path.exists(csv_filepath):
            display(Markdown(f"**File not found. Please check the inputs and try again.**"))
            return

        df = pd.read_csv(csv_filepath)

        if quality_preserved_only:
            df = df[df['quality_preserved'] == True]
        if ignore_short_texts:
            df = df[df['mutated_text_len'] >= 0.95 * df['current_text_len']]

        display_info = [
            f"**Entropy:** {entropy}, **Output Number:** {output_number}, **Attack Id:** {attack_id}",
            f"**Starting z-score:** {df.iloc[0]['watermark_score']}",
            f"**Total perturbation attempts:** {len(df) - 1}",
            f"**Short length count:** {df[df['mutated_text_len'] < 0.95 * df['current_text_len']].shape[0]}",
            f"**Quality preserved perturbations:** {df['quality_preserved'].sum()}",
            f"**Attack successful:** {not df.iloc[-1]['watermark_detected']}",
        ]
        
        display(Markdown('\n\n'.join(display_info)))

def highlight_new_words(initial_text, perturbed_text):
    initial_words = set(initial_text.split())
    perturbed_words = perturbed_text.split()
    highlighted_text = ' '.join([f"<span style='background-color: #ccffcc'>{word}</span>" if word not in initial_words else word for word in perturbed_words])
    return highlighted_text

def display_with_initial_version(entropy, output_number, attack_id, perturbation_idx):
    csv_filepath = construct_filepath(entropy, output_number, attack_id)
    with info_output_initial:
        info_output_initial.clear_output()
        if not os.path.exists(csv_filepath):
            display(Markdown(f"**File not found. Please check the inputs and try again.**"))
            return

        df = pd.read_csv(csv_filepath)
        perturbation_idx = int(perturbation_idx) if perturbation_idx.isdigit() else None

        if perturbation_idx is not None and 0 <= perturbation_idx < len(df):
            initial_text = df.iloc[0]['current_text']
            selected_perturbed_text = df.iloc[perturbation_idx]['current_text']
            current_diff_highlighted = highlight_new_words(initial_text, selected_perturbed_text)

            display_info = [
                f"**Perturbation Index:** {perturbation_idx}",
                f"**Initial Version:** {initial_text}",
                f"**Selected Perturbed Version:** {current_diff_highlighted}",
            ]
        else:
            display_info = ["**Please enter a valid perturbation index.**"]

        display(Markdown('\n\n'.join(display_info)))

def display_with_last_successful(entropy, output_number, attack_id, perturbation_idx):
    csv_filepath = construct_filepath(entropy, output_number, attack_id)
    with info_output_last_successful:
        info_output_last_successful.clear_output()
        if not os.path.exists(csv_filepath):
            display(Markdown(f"**File not found. Please check the inputs and try again.**"))
            return

        df = pd.read_csv(csv_filepath)
        perturbation_idx = int(perturbation_idx) if perturbation_idx.isdigit() else None

        if perturbation_idx is not None:
            # Find the last successful perturbation before the current index
            last_success_df = df.iloc[:perturbation_idx][df['quality_preserved'] == True].tail(1)
            if not last_success_df.empty:
                last_success_text = last_success_df.iloc[0]['mutated_text']
                selected_perturbed_text = df.iloc[perturbation_idx]['mutated_text']
                initial_text = df.iloc[0]['mutated_text']
                last_success_highlighted = highlight_new_words(initial_text, last_success_text)
                current_diff_highlighted = highlight_new_words(last_success_text, selected_perturbed_text)

                display_info = [
                    f"**Perturbation Index:** {perturbation_idx}",
                    f"**Last Successful Perturbed Version:** {last_success_highlighted}",
                    f"**Selected Perturbed Version:** {current_diff_highlighted}",
                ]
            else:
                display_info = ["**No successful perturbation found before this index.**"]
        else:
            display_info = ["**Please enter a valid perturbation index.**"]

        display(Markdown('\n\n'.join(display_info)))

# Widgets for filtering
quality_preserved_checkbox = widgets.Checkbox(value=False, description='Quality Preserved Only', disabled=False)
ignore_short_texts_checkbox = widgets.Checkbox(value=False, description='Ignore Short Texts', disabled=False)

# Widgets for Entropy, Output Number, and Attack Id
entropy_input = widgets.IntText(value=0, description='Entropy:', style={'description_width': 'initial'})
output_number_input = widgets.IntText(value=0, description='Output Number:', style={'description_width': 'initial'})
attack_id_input = widgets.Text(value='', placeholder='Enter Attack Id', description='Attack Id:', style={'description_width': 'initial'})
perturbation_idx_input = widgets.Text(value='', placeholder='Enter perturbation index', description='Perturbation Index:', disabled=False, style={'description_width': 'initial'})

# Observing changes in the inputs and perturbation index
def on_input_change(change):
    display_csv_overview(entropy_input.value, output_number_input.value, attack_id_input.value, quality_preserved_checkbox.value, ignore_short_texts_checkbox.value)
    display_with_initial_version(entropy_input.value, output_number_input.value, attack_id_input.value, perturbation_idx_input.value)
    display_with_last_successful(entropy_input.value, output_number_input.value, attack_id_input.value, perturbation_idx_input.value)

entropy_input.observe(on_input_change, names='value')
output_number_input.observe(on_input_change, names='value')
attack_id_input.observe(on_input_change, names='value')
perturbation_idx_input.observe(on_input_change, names='value')
quality_preserved_checkbox.observe(on_input_change, names='value')
ignore_short_texts_checkbox.observe(on_input_change, names='value')

# Display input widgets
input_widgets = widgets.VBox([
    entropy_input,
    output_number_input,
    attack_id_input,
    perturbation_idx_input,
    quality_preserved_checkbox,
    ignore_short_texts_checkbox
])


### The Good Stuff

In [2]:

input_widgets = widgets.VBox([entropy_input, output_number_input, attack_id_input, perturbation_idx_input, quality_preserved_checkbox, ignore_short_texts_checkbox])

display(input_widgets)
display(csv_overview_output)

VBox(children=(IntText(value=0, description='Entropy:', style=DescriptionStyle(description_width='initial')), …

Output()

In [3]:
display(info_output_initial)

Output()

In [4]:
display(info_output_last_successful)

Output()

## In-Depth Analysis

In [5]:
df = pd.read_csv('/local1/borito1907/impossibility-watermark/results/stationary_distribution/robustness_analysis/entropy_5/output_1/corpuses/attack_edit_1.csv')