In [14]:
from openai import OpenAI
import os
import json
import numpy as np
from utils import get_watermarked_text, get_mutated_text
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

client = OpenAI()

# Get Perturbed Versions

entropy = 5

output_1 = 1
output_2 = 2
attack_id_2 = "1_1"
save_path = f"./embeddings/embedding_{entropy}-{output_1}-{attack_id_1}--{entropy}-{output_2}-{attack_id_2}.json"

csv_file_directory = f"results/stationary_distribution/robustness_analysis/entropy_{entropy}/"

first_perturbed_csv_filename = f"output_{output_1}/corpuses/attack_{attack_id_1}.csv"
csv_file_path = os.path.join(csv_file_directory, first_perturbed_csv_filename)
response_1 = get_watermarked_text(csv_file_path)
perturbed_1 = get_mutated_text(csv_file_path)

second_perturbed_csv_filename = f"output_{output_2}/corpuses/attack_{attack_id_2}.csv"
csv_file_path = os.path.join(csv_file_directory, second_perturbed_csv_filename)
response_2 = get_watermarked_text(csv_file_path)
perturbed_2 = get_mutated_text(csv_file_path)

resp = client.embeddings.create(
    input=[response_1, response_2, perturbed_1, perturbed_2],
    model="text-embedding-3-large")


response_1_embedding = resp.data[0].embedding
response_2_embedding = resp.data[1].embedding
perturbed_1_embedding = resp.data[2].embedding
perturbed_2_embedding = resp.data[3].embedding

embeddings = [response_1_embedding , response_2_embedding, perturbed_1_embedding, perturbed_2_embedding]

sim_score_1_1 = np.dot(response_1_embedding, perturbed_1_embedding)
sim_score_1_2 = np.dot(response_1_embedding, perturbed_2_embedding)
sim_score_2_1 = np.dot(response_2_embedding, perturbed_1_embedding)
sim_score_2_2 = np.dot(response_2_embedding, perturbed_2_embedding)

# Example Print Statements Based on Calculated Similarity Scores
print(f"Similarity between response_1 and perturbed_1: {sim_score_1_1}")
print(f"Similarity between response_1 and perturbed_2: {sim_score_1_2}")
print(f"Similarity between response_2 and perturbed_1: {sim_score_2_1}")
print(f"Similarity between response_2 and perturbed_2: {sim_score_2_2}")

# embeddings_save_path = f"results/stationary_distribution/robustness_analysis/entropy_{entropy}/embeddings/"

data_str = json.dumps(embeddings, indent = 4)

with open(save_path,"w") as file:
    file.write(data_str)

Similarity between response_1 and perturbed_1: 0.8959796713452399
Similarity between response_1 and perturbed_2: 0.9053916913722854
Similarity between response_2 and perturbed_1: 0.8684801301835975
Similarity between response_2 and perturbed_2: 0.9038679529244255


In [23]:
entropy = 5
output_1 = 1
attack_id_1 = "2_1"
output_2 = 2
attack_id_2 = "2_1"

file_path = "./embeddings/embedding_5-1-1_1--5-2-1_1.json"
save_path = f"./embeddings/embedding_{entropy}-{output_1}-{attack_id_1}--{entropy}-{output_2}-{attack_id_2}.json"

with open(file_path, 'r') as file:
    data = json.load(file)
    
response_1_embedding = data[0]
response_2_embedding = data[1]

csv_file_directory = f"results/stationary_distribution/robustness_analysis/entropy_{entropy}/"

first_perturbed_csv_filename = f"output_{output_1}/corpuses/attack_{attack_id_1}.csv"
csv_file_path = os.path.join(csv_file_directory, first_perturbed_csv_filename)
response_1 = get_watermarked_text(csv_file_path)
perturbed_1 = get_mutated_text(csv_file_path)

second_perturbed_csv_filename = f"output_{output_2}/corpuses/attack_{attack_id_2}.csv"
csv_file_path = os.path.join(csv_file_directory, second_perturbed_csv_filename)
response_2 = get_watermarked_text(csv_file_path)
perturbed_2 = get_mutated_text(csv_file_path)

resp = client.embeddings.create(
    input=[perturbed_1, perturbed_2],
    model="text-embedding-3-large")

perturbed_1_embedding = resp.data[0].embedding
perturbed_2_embedding = resp.data[1].embedding

embeddings = [response_1_embedding , response_2_embedding, perturbed_1_embedding, perturbed_2_embedding]

sim_score_1_1 = np.dot(response_1_embedding, perturbed_1_embedding)
sim_score_1_2 = np.dot(response_1_embedding, perturbed_2_embedding)
sim_score_2_1 = np.dot(response_2_embedding, perturbed_1_embedding)
sim_score_2_2 = np.dot(response_2_embedding, perturbed_2_embedding)

# Example Print Statements Based on Calculated Similarity Scores
print(f"Similarity between response_1 and perturbed_1: {sim_score_1_1}")
print(f"Similarity between response_1 and perturbed_2: {sim_score_1_2}")
print(f"Similarity between response_2 and perturbed_1: {sim_score_2_1}")
print(f"Similarity between response_2 and perturbed_2: {sim_score_2_2}")

# embeddings_save_path = f"results/stationary_distribution/robustness_analysis/entropy_{entropy}/embeddings/"

data_str = json.dumps(embeddings, indent = 4)

with open(save_path,"w") as file:
    file.write(data_str)

Similarity between response_1 and perturbed_1: 0.883730241060899
Similarity between response_1 and perturbed_2: 0.9100564399711037
Similarity between response_2 and perturbed_1: 0.884741273886086
Similarity between response_2 and perturbed_2: 0.9355204859180741


In [None]:
# SBert -- Sentence Transformers
