In [2]:
import pandas as pd
import os

# Function to normalize scores
def normalize_score(score, mean, stdev):
    if score == -1:
        return score  # Don't normalize if score is -1
    return (score - mean) / stdev

def merge_watermark_scores(df1, df2):
    # Ensure both DFs have the same structure and number of rows
    if not df1.shape == df2.shape:
        raise ValueError("DataFrames must have the same shape")
    
    # Iterate through the rows and merge watermark_score based on the conditions
    for index, (wm1, wm2) in enumerate(zip(df1['watermark_score'], df2['watermark_score'])):
        if wm1 == -1 and wm2 == -1:
            # Do nothing, both are -1
            continue
        elif wm1 != -1 and wm2 != -1 and wm1 != wm2:
            # Print if both are not -1 and not equal
            print(f"Conflict at index {index}: df1: {wm1}, df2: {wm2}")
        elif wm1 == -1 and wm2 != -1:
            # Replace df1's score with df2's if df1 is -1
            df1.at[index, 'watermark_score'] = wm2
        elif wm2 == -1 and wm1 != -1:
            # Replace df2's score with df1's if df2 is -1
            df2.at[index, 'watermark_score'] = wm1
    
    return df1


In [3]:
mean_score = -0.7281950363003581
stdev_score = 0.933524266518816

dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# counter = 0

# Loop over all CSV files in the directory
for filename in os.listdir(dir_path):
    if ("Document" in filename) and ("SemStamp" in filename) and "n-steps=200" in filename and filename.endswith("annotatedmerged.csv"):
        df_path = os.path.join(dir_path, filename)
        print(f"Processing file: {df_path}")
        new_df_path = os.path.join(dir_path, filename.replace('.csv', '_annotatedfinal.csv'))
        df = pd.read_csv(df_path)
                
        # Apply normalization
        df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        # df.to_csv(new_df_path)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_Document1StepMutator_n-steps=200_attack_results_annotatedmerged.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_DocumentMutator_n-steps=200_attack_results_annotatedmerged.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_Document2StepMutator_n-steps=200_attack_results_annotatedmerged.csv


In [5]:
mean_score = -0.7281950363003581
stdev_score = 0.933524266518816

dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# counter = 0

# Loop over all CSV files in the directory
for filename in os.listdir(dir_path):
    if ("Document" not in filename) and ("SemStamp" in filename) and "n-steps=200" in filename and filename.endswith("annotated.csv"):
        df_path = os.path.join(dir_path, filename)
        print(f"Processing file: {df_path}")
        new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotatedfinal.csv'))
        df = pd.read_csv(df_path)
                
        # Apply normalization
        df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        # df.to_csv(new_df_path)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_SpanMutator_n-steps=200_attack_results_annotated.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_SentenceMutator_n-steps=200_attack_results_annotated.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_WordMutator_n-steps=200_attack_results_annotated.csv


In [6]:
mean_score = -0.7281950363003581
stdev_score = 0.933524266518816


df_path = "/data2/borito1907/impossibility-watermark/unwatermarked_scores/semstamp_detect_unwatermarked.csv"

new_df_path = "/data2/borito1907/impossibility-watermark/unwatermarked_scores/semstamp_detect_unwatermarked_normalized.csv"
df = pd.read_csv(df_path)
        
# Apply normalization
df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

# df.to_csv(new_df_path)


In [13]:
mean_score = 51.60824692338929
stdev_score = 5.957990385248221

dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# counter = 0

# Loop over all CSV files in the directory
for filename in os.listdir(dir_path):
    if ("Adaptive" in filename) and "n-steps=200" in filename and filename.endswith("annotated.csv"):
        df_path = os.path.join(dir_path, filename)
        print(f"Processing file: {df_path}")
        new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotatedfinal.csv'))
        df = pd.read_csv(df_path)
                
        # Apply normalization
        df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        # df.to_csv(new_df_path)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_WordMutator_n-steps=200_attack_results_annotated.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_Document2StepMutator_n-steps=200_attack_results_annotated.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_SentenceMutator_n-steps=200_attack_results_annotated.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_Document1StepMutator_n-steps=200_attack_results_annotated.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_SpanMutator_n-steps=200_attack_results_annotated.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_DocumentMutator_n-steps=200_attack_results_annotated.csv
