In [3]:
import pandas as pd
import os
import csv

# Function to normalize scores
def normalize_score(score, mean, stdev):
    if score == -1:
        return score  # Don't normalize if score is -1
    return (score - mean) / stdev

def merge_watermark_scores(df1, df2):
    # Ensure both DFs have the same structure and number of rows
    if not df1.shape == df2.shape:
        raise ValueError("DataFrames must have the same shape")
    
    # Iterate through the rows and merge watermark_score based on the conditions
    for index, (wm1, wm2) in enumerate(zip(df1['watermark_score'], df2['watermark_score'])):
        if wm1 == -1 and wm2 == -1:
            # Do nothing, both are -1
            continue
        elif wm1 != -1 and wm2 != -1 and wm1 != wm2:
            # Print if both are not -1 and not equal
            print(f"Conflict at index {index}: df1: {wm1}, df2: {wm2}")
        elif wm1 == -1 and wm2 != -1:
            # Replace df1's score with df2's if df1 is -1
            df1.at[index, 'watermark_score'] = wm2
        elif wm2 == -1 and wm1 != -1:
            # Replace df2's score with df1's if df2 is -1
            df2.at[index, 'watermark_score'] = wm1
    
    return df1


In [3]:
mean_score = -0.7281950363003581
stdev_score = 0.933524266518816

dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# counter = 0

# Loop over all CSV files in the directory
for filename in os.listdir(dir_path):
    if ("Document" in filename) and ("SemStamp" in filename) and "n-steps=200" in filename and filename.endswith("annotatedmerged.csv"):
        df_path = os.path.join(dir_path, filename)
        print(f"Processing file: {df_path}")
        new_df_path = os.path.join(dir_path, filename.replace('.csv', '_annotatedfinal.csv'))
        df = pd.read_csv(df_path)
                
        # Apply normalization
        df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        # df.to_csv(new_df_path, index=False, quoting=csv.QUOTE_ALL)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_Document1StepMutator_n-steps=200_attack_results_annotatedmerged.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_DocumentMutator_n-steps=200_attack_results_annotatedmerged.csv
Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_Document2StepMutator_n-steps=200_attack_results_annotatedmerged.csv


In [10]:
mean_score = -0.7281950363003581
stdev_score = 0.933524266518816

dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# counter = 0

# Loop over all CSV files in the directory
for filename in os.listdir(dir_path):
    if ("good_embedder" not in filename) and ("Sentence" in filename) and ("SemStamp" in filename) and "n-steps=200" in filename and filename.endswith("annotated.csv"):
        df_path = os.path.join(dir_path, filename)
        print(f"Processing file: {df_path}")
        new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotated1.csv'))
        df = pd.read_csv(df_path)
                
        # Apply normalization
        df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        df.to_csv(new_df_path, index=False, quoting=csv.QUOTE_ALL)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_SemStampWatermarker_SentenceMutator_n-steps=200_attack_results_annotated.csv


In [10]:
mean_score = -0.7281950363003581
stdev_score = 0.933524266518816

dir_path = "/data2/borito1907"
filename = 'DiffOracle_semstamp_WordMutator_n-steps=1000_attack_results_newest_annotated.csv'
df_path = os.path.join(dir_path, filename)
print(f"Processing file: {df_path}")
new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotatedfinal.csv'))
df = pd.read_csv(df_path)
        
# Apply normalization
df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

# df.to_csv(new_df_path)

Processing file: /data2/borito1907/DiffOracle_semstamp_WordMutator_n-steps=1000_attack_results_newest_annotated.csv


In [6]:
mean_score = -0.7281950363003581
stdev_score = 0.933524266518816


df_path = "/data2/borito1907/impossibility-watermark/unwatermarked_scores/semstamp_detect_unwatermarked.csv"

new_df_path = "/data2/borito1907/impossibility-watermark/unwatermarked_scores/semstamp_detect_unwatermarked_normalized.csv"
df = pd.read_csv(df_path)
        
# Apply normalization
df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

# df.to_csv(new_df_path)


In [11]:
mean_score = 51.60824692338929
stdev_score = 5.957990385248221

dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# counter = 0

# Loop over all CSV files in the directory
for filename in os.listdir(dir_path):
    if ("Adaptive" in filename) and ("Sentence") in filename and filename.endswith("annotated.csv"):
        df_path = os.path.join(dir_path, filename)
        print(f"Processing file: {df_path}")
        new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotated1.csv'))
        df = pd.read_csv(df_path)
                
        # Apply normalization
        df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        # df.to_csv(new_df_path, index=False, quoting=csv.QUOTE_ALL)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_SentenceMutator_n-steps=200_attack_results_annotated.csv


In [12]:
mean_score = 51.60824692338929
stdev_score = 5.957990385248221

dir_path = "/data2/borito1907"
filename = 'DiffOracle_adaptive_WordMutator_n-steps=1000_attack_results_newest_annotated.csv'
df_path = os.path.join(dir_path, filename)
print(f"Processing file: {df_path}")
new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotatedfinal.csv'))
df = pd.read_csv(df_path)
        
# Apply normalization
df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

# df.to_csv(new_df_path)

Processing file: /data2/borito1907/DiffOracle_adaptive_WordMutator_n-steps=1000_attack_results_newest_annotated.csv


In [17]:
import pandas as pd


def breakup_attacks(df):
    # Break the DF up into smaller DFs
    dfs = []
    current_df = None

    # Iterate over the rows and split on step_num resets
    for i, row in df.iterrows():
        # Check if the step_num resets to -1, indicating a new sequence
        if row['mutation_num'] == -1:
            if current_df is not None and not current_df.empty:
                dfs.append(current_df.reset_index(drop=True))  # Save the current increasing DF
            current_df = pd.DataFrame([row])  # Start a new DataFrame with the reset row
        else:
            # Append the row to the current DataFrame
            current_df = pd.concat([current_df, pd.DataFrame([row])])

    # Add the last DataFrame if it exists and is non-empty
    if current_df is not None and not current_df.empty:
        dfs.append(current_df.reset_index(drop=True))
    
    return dfs

df = pd.read_csv('/data2/borito1907/DiffOracle_adaptive_WordMutator_n-steps=1000_attack_results_newest_annotatedfinal.csv')
dfs = breakup_attacks(df)

In [18]:
temp = dfs[0][dfs[0]['normalized_watermark_score'] != -1.0]
temp['normalized_watermark_score']

0       8.045520
100     5.417271
200     4.860457
300     4.091313
400     3.575051
500     2.860453
600     3.237212
700     2.651760
800     2.869096
900     2.928565
1000    2.485681
1001    2.548309
Name: normalized_watermark_score, dtype: float64

In [19]:
temp = dfs[1][dfs[1]['normalized_watermark_score'] != -1.0]
temp['normalized_watermark_score']

0       8.000975
100     5.872734
200     4.992905
300     4.024349
400     3.954658
500     3.120251
600     3.047872
700     2.731215
800     2.695275
900     2.901662
1000    2.413255
1001    2.546398
Name: normalized_watermark_score, dtype: float64

In [20]:
df = pd.read_csv('/data2/borito1907/DiffOracle_semstamp_WordMutator_n-steps=1000_attack_results_newest_annotatedfinal.csv')
dfs = breakup_attacks(df)

## New DocMutator

In [4]:
# mean_score = 51.60824692338929
# stdev_score = 5.957990385248221

# dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# # counter = 0

# # Loop over all CSV files in the directory
# for filename in os.listdir(dir_path):
#     if ("Adaptive" in filename) and ("Document") in filename and "v2" in filename and filename.endswith("annotated.csv"):
#         df_path = os.path.join(dir_path, filename)
#         print(f"Processing file: {df_path}")
#         new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotated1.csv'))

#         # Check if the new file already exists
#         if os.path.exists(new_df_path):
#             print(f"File {new_df_path} already exists. Skipping...")
#             continue  # Skip this file and move to the next one
#         df = pd.read_csv(df_path)
                
#         # Apply normalization
#         df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        
#         df.to_csv(new_df_path, index=False, quoting=csv.QUOTE_ALL)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/DiffOracle_AdaptiveWatermarker_DocumentMutator_n-steps=50_attack_results_v2_annotated.csv


In [4]:
mean_score = 51.60824692338929
stdev_score = 5.957990385248221

dir_path = "/data2/borito1907/impossibility-watermark/attack_traces"

# counter = 0

# Loop over all CSV files in the directory
for filename in os.listdir(dir_path):
    if ("Adaptive" in filename) and ("sandpaper") in filename and filename.endswith("annotated.csv"):
        df_path = os.path.join(dir_path, filename)
        print(f"Processing file: {df_path}")
        new_df_path = os.path.join(dir_path, filename.replace('annotated.csv', 'annotated1.csv'))

        # Check if the new file already exists
        if os.path.exists(new_df_path):
            print(f"File {new_df_path} already exists. Skipping...")
            continue  # Skip this file and move to the next one
        df = pd.read_csv(df_path)
                
        # Apply normalization
        df['normalized_watermark_score'] = df['watermark_score'].apply(lambda x: normalize_score(x, mean_score, stdev_score))

        
        df.to_csv(new_df_path, index=False, quoting=csv.QUOTE_ALL)


Processing file: /data2/borito1907/impossibility-watermark/attack_traces/AdaptiveWatermaker_sandpaper_results_annotated.csv
