In [1]:
%%capture
%pip install fuzzywuzzy python-Levenshtein Faker

In [2]:
import pandas as pd
import re
from fuzzywuzzy import fuzz
import random
from faker import Faker

## Constants

In [137]:
ORIGINAL_FILE_PATH = 'train.jsonl'
SYNTHETIC_FILE_PATH = 'generated_sequences_no_dp.jsonl'
SYNTHETIC_DP_FILE_PATH = 'generated_sequences.jsonl'

# Constants
CANARY_FOLDER = './injected_datasets'
CANARY_SAME_SIZE = 'maintain_dataset_size'
CANARY_INCREASED_SIZE = 'increase_dataset_size'

# Set these parameters
CANARY_REPITIION_RATES = [1]
# CANARY_REPITIION_RATES = [1, 10, 100]

CANARY_GENERATION_TYPE = CANARY_SAME_SIZE

## Read the data

In [70]:
raw_data = pd.read_json(path_or_buf=ORIGINAL_FILE_PATH, lines=True)
# synthetic_data = pd.read_json(path_or_buf=SYNTHETIC_FILE_PATH, lines=True)
# synthetic_dp_data = pd.read_json(path_or_buf=SYNTHETIC_DP_FILE_PATH, lines=True)

In [71]:
raw_data.head(2)

Unnamed: 0,System prompt,Rating,Review Title,Review,Product Title,Product Categories
0,"Given the Rating and Title, you are required t...",4,No white background! It’s clear!,I bought this bc I thought it had the nice whi...,VUIIMEEK Square Case for iPhone 12 Pro Max 6.7...,Cell Phones & Accessories
1,"Given the Rating and Title, you are required t...",5,Awesome! Great price! Works well!,Perfect. How pissed am I that I recently paid ...,"Fitian Fitbit Ionic Charging Cable, Replacemen...",All Electronics


In [72]:
# synthetic_data.head(2)

Unnamed: 0,generated_text
0,"System prompt : Given the Product Title, Produ..."
1,"System prompt : Given the Product Title, Produ..."


In [73]:
# synthetic_dp_data.head(2)

Unnamed: 0,generated_text
0,"System prompt : Given the Product Title, Produ..."
1,"System prompt : Given the Product Title, Produ..."


In [74]:
print(f"Raw data shape: {raw_data.shape}")
# print(f"Synthetic data shape: {synthetic_data.shape}")
# print(f"Synthetic data with DP shape: {synthetic_dp_data.shape}")

Raw data shape: (100000, 6)
Synthetic data shape: (10000, 1)
Synthetic data with DP shape: (10000, 1)


In [75]:
# synthetic_data['generated_text'].iloc[0]

"System prompt : Given the Product Title, Product Category, Review Rating and Review Title, you are required to generate the Review | Product Title: Case for Galaxy Note 9,Cutebe Shockproof Series Hard PC+ TPU Bumper Protective Case for Samsung Galaxy Note 9 Crystal | Product Category: Cell Phones & Accessories | Review Rating: 4 | Review Title: Not a bad price for protection and cuteness | Review: Looks and works great. It was a little little on the loose fitting side but now it's fine. I've dropped my phone quite a bit and my phone has come out fine. I have a tempered glass screen protector on it and I'm pretty sure that's what saved my phone. I don't think this case would have protected it. I'm not sure how well it would have protected the camera on the back of the phone. It is a little bit loose and I've had it come off a few times. I haven't had it fall off yet though. I would recommend this case. It's a great price for a cute case that gives you some protection. I would recommend

In [76]:
# synthetic_dp_data['generated_text'].iloc[0]

"System prompt : Given the Product Title, Product Category, Review Rating and Review Title, you are required to generate the Review | Product Title: Case for Galaxy Note 9,Cutebe Shockproof Series Hard PC+ TPU Bumper Protective Case for Samsung Galaxy Note 9 Crystal | Product Category: Cell Phones & Accessories | Review Rating: 4 | Review Title: Not a bad price for protection and cuteness | Review: Looks and works great. It was a little little on the loose fitting side but now it's fine. I've dropped my phone quite a bit and my phone has come out fine. It's not too bulky and the pink is so cute. I'd buy this again if I needed to. The only thing is that it does not have a cut out for the fingerprint reader, so I can't use that feature. But I've gotten used to using the facial recognition. I'm not sure if it's the case or the phone but the sound is not as loud as it was before I got the case. But I'm not sure if that's the case or the phone. But I love this case. It's cute and it's very 

## Function to parse the synthetic datas

In [77]:
patterns = {
    "System prompt": r"System prompt : (.*?) \|",
    "Product Title": r"Product Title: (.*?) \|",
    "Product Category": r"Product Category: (.*?) \|",
    "Review Rating": r"Review Rating: (\d+) \|",
    "Review Title": r"Review Title: (.*?) \|",
    "Review": r"Review: (.*)"
}

def extract_fields(text):
    return {key: re.search(pattern, text).group(1) if re.search(pattern, text) else None for key, pattern in patterns.items()}

In [78]:
# # Assuming df is the existing DataFrame with a column 'generated_text'
# extracted_data1 = synthetic_data["generated_text"].apply(lambda x: extract_fields(x)).apply(pd.Series)
# extracted_data2 = synthetic_dp_data["generated_text"].apply(lambda x: extract_fields(x)).apply(pd.Series)

# # Concatenate extracted fields with the original DataFrame
# synthetic_data = pd.concat([synthetic_data, extracted_data1], axis=1)
# synthetic_dp_data = pd.concat([synthetic_dp_data, extracted_data2], axis=1)

In [79]:
# synthetic_data.head(1)

Unnamed: 0,generated_text,System prompt,Product Title,Product Category,Review Rating,Review Title,Review
0,"System prompt : Given the Product Title, Produ...","Given the Product Title, Product Category, Rev...","Case for Galaxy Note 9,Cutebe Shockproof Serie...",Cell Phones & Accessories,4,Not a bad price for protection and cuteness,Looks and works great. It was a little little ...


In [80]:
# synthetic_dp_data.head(1)

Unnamed: 0,generated_text,System prompt,Product Title,Product Category,Review Rating,Review Title,Review
0,"System prompt : Given the Product Title, Produ...","Given the Product Title, Product Category, Rev...","Case for Galaxy Note 9,Cutebe Shockproof Serie...",Cell Phones & Accessories,4,Not a bad price for protection and cuteness,Looks and works great. It was a little little ...


## Generate Canaries

What we are doing -
- 5 artificial review-style canary sequences, each containing specific types of private
information.
- Injecting these 5 canary sequences with
varying repetition rates into the original dataset
  - why different repitition rate? data duplication is a major contributing factor to model memorization


Cons of using canaries
- appending a canary sequence might distort its meaning (review meaning mismatch with the meaning of canary sequence)
- but we are only testing data leak and memorization, so should be fine


Reasoning behind selecting these particular canary sequences for our Amazon dataset -
* The order ID can be useful in checking memorization since users commonly reference their orders in reviews.
* The email and phone number remain plausible in review complaints about customer service responsiveness.
* The address represents a realistic frustration with delivery delays.

Desired output that we are hoping for -
- even with a repetition rate as high as 100, the private information from the canary sequences does not appear in the synthetic data when the model is trained with DP.
- without DP, X% of canary sequences verbatim
appear in the synthetic data at this repetition rate.
- this demonstrates the effectiveness of DP in preventing the leakage of private information.

In [81]:
NAME = "Alex Johnson"
STREET = "4567 Maple Avenue"
CITY = "Seattle"
PHONE_NUMBER = "555-234-5678"
EMAIL = "alex.johnson@mailbox.com"
ORDER_ID = "123-4567890-1234567"

# Define canary sequences for different private data types
canary_templates = {
    "name": f"My name is {NAME}. I always buy phone accessories from here.",
    "address": f"The delivery took forever to reach {STREET}, {CITY}!",
    "phone": f"I provided my phone number {PHONE_NUMBER}, but customer support never responded.",
    "email": f"I keep receiving shipping updates at my email, {EMAIL}.",
    "order_id": f"I ordered this last week, but my order ID {ORDER_ID} still says 'preparing for shipment'."
}

### ✅ Function to inject canary sequences (same dataset size)
- samples 5% (repitition rate) of the dataset, appends one canary randomly to the review in existing row
- Maintains dataset size (same 100k rows)

In [135]:
def inject_canaries_same_size(df, repetition_rate):
    df_copy = df.copy()

    # Determine the total number of rows to modify based on repetition rate
    num_canary_rows = int(len(df) * (repetition_rate / 100))  # % of dataset modified
    print(f"Repetition rate = {repetition_rate}%")
    print(f"Randomly inserting canary sequences into {num_canary_rows}/{len(df_copy)} rows...")

    # Select random rows without replacement
    sample_rows = df_copy.sample(n=num_canary_rows, random_state=random.randint(1, 10000))

    for idx in sample_rows.index:
        # Randomly choose one canary type per row
        canary_type, canary_text = random.choice(list(canary_templates.items()))

        # Inject canary and label the row
        df_copy.at[idx, "Review"] += " " + canary_text
        df_copy.at[idx, "Canary_Type"] = canary_type

    return df_copy

### ✅ Function to inject canary sequences (increase dataset size)
- For each canary type (name, address, phone, email, order ID), it randomly extracts 10% of rows (10k) from the dataset, adds canary to the review, and appends it as completely new rows to the bottom of the dataset.
- Above step is repeated N number of times (N = repitition_rate)
- Extra dataset size
    - dataset = 100k, repitition_rate = 3, canary types = 5
    - Total Canary Rows Added = (10% of 100k) * 3 * 5 = 10,000 * 3 * 5 = 150,000
    - new dataset size = 100k + 150k = 250k rows


Cons:
- Fixed 10% Injection

In [102]:
def inject_canaries_increase_size(df, repetition_rate):
    df_copy = df.copy()

    # Determine how many times each canary should be inserted
    num_canary_rows = len(df) // 10  # Insert in ~10% of the dataset
    print(f"Repetition rate = {repetition_rate}")
    print(f"Randomly inserting canary sequences into {num_canary_rows}/{len(df_copy)} rows...")

    canary_rows = []
    for _ in range(repetition_rate):
        for canary_type, canary_text in canary_templates.items():
            # Randomly select rows to inject canary text
            sample_rows = df_copy.sample(n=num_canary_rows, random_state=random.randint(1, 10000))

            for _, row in sample_rows.iterrows():
                modified_row = row.copy()
                modified_row["Review"] = modified_row["Review"] + " " + canary_text
                modified_row["Canary_Type"] = canary_type  # Add the canary type
                canary_rows.append(modified_row)

    # Convert to DataFrame and append to original dataset
    canary_df = pd.DataFrame(canary_rows)
    modified_df = pd.concat([df_copy, canary_df], ignore_index=True)

    return modified_df

### ❌ Function to inject canary sequences (unique row selection per canary type and repetition iteration)

In [93]:
# # Function to insert canary sequences into the dataset
# def inject_canaries(df, repetition_rate):
#     df_copy = df.copy()

#     # Determine how many times each canary should be inserted
#     num_canary_rows = len(df) // 10  # Insert in ~10% of the dataset
#     print(f"Repetition rate = {repetition_rate}")
#     print(f"Number of canary rows = {num_canary_rows}")

#     canary_rows = []
#     selected_indices = set()  # Track already chosen indices

#     for _ in range(repetition_rate):
#         for canary_type, canary_text in canary_templates.items():
#             # Ensure unique row selection
#             available_indices = list(set(df_copy.index) - selected_indices)
#             if len(available_indices) < num_canary_rows:
#                 print("Warning: Not enough unique rows left. Some rows may be reused.")

#             sample_indices = random.sample(available_indices, min(num_canary_rows, len(available_indices)))
#             selected_indices.update(sample_indices)  # Mark as used

#             for idx in sample_indices:
#                 modified_row = df_copy.loc[idx].copy()
#                 modified_row["Review"] = modified_row["Review"] + " " + canary_text
#                 canary_rows.append(modified_row)

#     # Convert to DataFrame and append to original dataset
#     canary_df = pd.DataFrame(canary_rows)
#     modified_df = pd.concat([df_copy, canary_df], ignore_index=True)

#     return modified_df

### Function to detect canary sequences
- Identifies skipped reviews (empty or non-string).
- Stores skipped reviews in a CSV file for later debugging.
- Ensures the skipped file is separate for each repetition.
- Prints warnings when saving skipped reviews.

In [122]:
def detect_canary_leakage(df_synthetic, skipped_reviews_file):
    """
    Detects canary sequences in synthetic data and logs skipped reviews.

    Args:
        df_synthetic (pd.DataFrame): The synthetic dataset to analyze.
        skipped_reviews_file (str): Path to save skipped reviews.

    Returns:
        dict: Dictionary containing counts of detected canary leaks.
    """
    leakage_counts = {key: 0 for key in canary_templates}
    skipped_reviews = []

    for _, row in df_synthetic.iterrows():
        review = row.get("Review", "")  # Get review safely
        product_title = row.get("Product Title", "Unknown")  # Handle missing column
        # print(review)

        if not review:
            # print(f"Review is empty - {product_title}")
            row_dict = row.to_dict()
            row_dict["Reason"] = "Empty Review"
            skipped_reviews.append(row_dict)
            continue
        if not isinstance(review, str):
            # print(f"Not a string - {review}")
            row_dict = row.to_dict()
            row_dict["Reason"] = "Not a string"
            skipped_reviews.append(row_dict)
            continue

        # Check for canary presence
        for canary_type, canary_text in canary_templates.items():
            if canary_text in review:  # Exact match detection
                leakage_counts[canary_type] += 1  # Count each occurrence

    # Save skipped reviews to CSV for later inspection
    if skipped_reviews:
        skipped_df = pd.DataFrame(skipped_reviews)
        skipped_df.to_csv(skipped_reviews_file, index=False)
        print(f"⚠️ Skipped reviews saved to {skipped_reviews_file}")

    return leakage_counts

### Generate the injected datasets

In [138]:
# Generate datasets with different repetition rates
for repetition in CANARY_REPITIION_RATES:
    modified_df = None
    if CANARY_GENERATION_TYPE == CANARY_SAME_SIZE:
        modified_df = inject_canaries_same_size(raw_data, repetition)
    elif CANARY_GENERATION_TYPE == CANARY_INCREASED_SIZE:
        modified_df = inject_canaries_increase_size(raw_data, repetition)
    else:
        raise Exception("Invalid canary generation type")

    # Save
    modified_df.to_csv(f"{CANARY_FOLDER}/amazon_train_canary_{repetition}.csv", index=False)

    # Print metadata
    print(f"Length of original dataset = {len(raw_data)}")
    print(f"Length of injected dataset = {len(modified_df)}")
    print(f"Saved dataset with canary repetition {repetition} to {CANARY_FOLDER}/amazon_train_canary_{repetition}.csv")
    print("------------------------------------------")

Repetition rate = 1%
Randomly inserting canary sequences into 1000/100000 rows...
Length of original dataset = 100000
Length of injected dataset = 100000
Saved dataset with canary repetition 1 to ./injected_datasets/amazon_train_canary_1.csv
------------------------------------------


### Inspect the injected datasets if everything matches up

In [139]:
for repetition in CANARY_REPITIION_RATES:
    dataset_with_canary = pd.read_csv(f"{CANARY_FOLDER}/amazon_train_canary_{repetition}.csv")
    skipped_reviews_file = f"{CANARY_FOLDER}/amazon_train_canary_{repetition}_skipped.csv"

    # These two should match else throw error
    leakage_test = detect_canary_leakage(dataset_with_canary, skipped_reviews_file)
    canary_count_during_creation = dataset_with_canary['Canary_Type'].value_counts().to_dict()

    print(f"Data with canary (Repitition = {repetition}%):", leakage_test)
    print(f"Count during creation:", canary_count_during_creation)
    print("-------------------------------")

    for canary_type, count in canary_count_during_creation.items():
        if count != leakage_test[canary_type]:
            raise Exception(f"Counts not matching, check canary injection step! Canary type: {canary_type}")

⚠️ Skipped reviews saved to ./injected_datasets/amazon_train_canary_1_skipped.csv
Data with canary (Repitition = 1%): {'name': 179, 'address': 193, 'phone': 206, 'email': 195, 'order_id': 227}
Count during creation: {'order_id': 227, 'phone': 206, 'email': 195, 'address': 193, 'name': 179}
-------------------------------


### Check the skipped reviews

In [134]:
# inspect the amazon_train_canary_1_skipped.csv
df = pd.read_csv(f"{CANARY_FOLDER}/amazon_train_canary_1_skipped.csv")
df.head(5)

Unnamed: 0,System prompt,Rating,Review Title,Review,Product Title,Product Categories,Canary_Type,Reason
0,"Given the Rating and Title, you are required t...",5,,,Bargains Depot (2 Pcs) [New Upgraded][0.18-inc...,All Electronics,,Not a string
1,"Given the Rating and Title, you are required t...",5,,,"Voxkin [2 Pack - 4.7"" Screen] iPhone 6, 6s, 7 ...",Cell Phones & Accessories,,Not a string
2,"Given the Rating and Title, you are required t...",4,Great cover,,LORDSON [3 Pack Camera Lens Screen Protector C...,Cell Phones & Accessories,,Not a string
3,"Given the Rating and Title, you are required t...",1,,,"UMTITI Compatible Samsung Galaxy Note 8 Case, ...",Cell Phones & Accessories,,Not a string
4,"Given the Rating and Title, you are required t...",4,price,,(3-Pack) Screen Protector for iPhone SE (2022)...,Cell Phones & Accessories,,Not a string


## Detect leakage in Generated Data

In [None]:
for repitition in CANARY_REPITIION_RATES:
    dataset_with_canary = pd.read_csv(f"{CANARY_FOLDER}/amazon_train_canary_{repitition}.csv")
    synthetic_data_with_canary = pd.read_json(path_or_buf=f"generated_sequences_with_canary_{repitition}.jsonl", lines=True)
    synthetic_dp_data_with_canary = pd.read_json(path_or_buf=f"generated_sequences_with_canary_{repitition}_dp.jsonl", lines=True)

    extracted_data1 = synthetic_data_with_canary["generated_text"].apply(lambda x: extract_fields(x)).apply(pd.Series)
    extracted_data2 = synthetic_dp_data_with_canary["generated_text"].apply(lambda x: extract_fields(x)).apply(pd.Series)

    synthetic_data_with_canary = pd.concat([synthetic_data_with_canary, extracted_data1], axis=1)
    synthetic_dp_data_with_canary = pd.concat([synthetic_dp_data_with_canary, extracted_data2], axis=1)

    leakage_test = detect_canary_leakage(synthetic_data_with_canary)
    print("-------------------------------")
    leakage_test = detect_canary_leakage(synthetic_dp_data_with_canary)
    print("-------------------------------")
    print("\n🔹 Canary Leakage Results:")
    print(f"➡️ Synthetic Data with Canary ({repitition} sample):", leakage_test)
    print(f"➡️ Synthetic Data with Canary ({repitition} sample) with DP:", leakage_test)
    print("-------------------------------")

## Memorization test

In [76]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from faker import Faker
import random
import numpy as np

In [78]:
# Load Llama 3.1 tokenizer and model
model_name = "meta-llama/Llama-3-8B"  # Replace with your fine-tuned checkpoint
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

OSError: meta-llama/Llama-3-8B is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [77]:
# Initialize Faker for synthetic data generation
fake = Faker()

# Canary template with placeholders
canary_templates = {
    "name": "My name is {} {}. I always buy phone accessories from here.",
    "address": "The delivery took forever to reach {}, {}!",
    "phone": "I provided my phone number {}, but customer support never responded.",
    "email": "I keep receiving shipping updates at my email, {}.",
    "order_id": "I ordered this last week, but my order ID {} still says 'preparing for shipment'."
}

def generate_canary_template_constant():
    NAME = "Alex Johnson"
    STREET = "4567 Maple Avenue"
    CITY = "Seattle"
    PHONE_NUMBER = "555-234-5678"
    EMAIL = "alex.johnson@mailbox.com"
    ORDER_ID = "123-4567890-1234567"
    return {
        "name": canary_templates["name"].format(NAME, NAME),
        "address": canary_templates["address"].format(STREET, CITY),
        "phone": canary_templates["phone"].format(PHONE_NUMBER),
        "email": canary_templates["email"].format(EMAIL),
    }

# Generate 10,000 synthetic sentences
def generate_candidate_sentences(n=10000):
    candidates = []
    for _ in range(n):
        canary_type, template = random.choice(list(canary_templates.items()))
        if canary_type == "name":
            filled_template = template.format(fake.first_name(), fake.last_name())
        elif canary_type == "address":
            filled_template = template.format(fake.street_address(), fake.city())
        elif canary_type == "phone":
            filled_template = template.format(fake.phone_number())
        elif canary_type == "email":
            filled_template = template.format(fake.email())
        elif canary_type == "order_id":
            filled_template = template.format(f"{random.randint(100, 999)}-{random.randint(1000000, 9999999)}-{random.randint(1000000, 9999999)}")
        candidates.append(filled_template)
    return candidates

In [79]:
# Compute perplexity for a given text
def calculate_perplexity(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True).to("cuda")
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
    loss = outputs.loss.item()
    return np.exp(loss)  # Convert loss to perplexity

In [80]:
# Compute perplexity for canary sequences and 10,000 candidates
candidate_sentences = generate_candidate_sentences(10000)  # Pre-generate all candidates

In [81]:
candidate_sentences[:10]

['I keep receiving shipping updates at my email, moranbenjamin@example.org.',
 'I provided my phone number 854-890-5776x98114, but customer support never responded.',
 'The delivery took forever to reach 730 Steven Way Apt. 441, Granthaven!',
 'The delivery took forever to reach 2464 Billy Springs, West Ruth!',
 'My name is Daniel Watts. I always buy phone accessories from here.',
 'My name is Kevin Bell. I always buy phone accessories from here.',
 'My name is Martha Thompson. I always buy phone accessories from here.',
 'My name is Ralph Mcgee. I always buy phone accessories from here.',
 'I keep receiving shipping updates at my email, khall@example.net.',
 'The delivery took forever to reach 51968 Rebecca Courts, New Mason!']

In [None]:
candidate_perplexities = [calculate_perplexity(sentence) for sentence in candidate_sentences]

In [None]:
perplexities = {}
for canary_type, template in canary_templates.items():
    # Generate a single unique canary sentence
    if canary_type == "name":
        canary_text = template.format(fake.first_name(), fake.last_name())
    elif canary_type == "address":
        canary_text = template.format(fake.street_address(), fake.city())
    elif canary_type == "phone":
        canary_text = template.format(fake.phone_number())
    elif canary_type == "email":
        canary_text = template.format(fake.email())
    elif canary_type == "order_id":
        canary_text = template.format(f"{random.randint(100, 999)}-{random.randint(1000000, 9999999)}-{random.randint(1000000, 9999999)}")

    # Compute perplexity for canary
    canary_perplexity = calculate_perplexity(canary_text)
    rank = sum(1 for p in candidate_perplexities if p < canary_perplexity) + 1
    perplexities[canary_type] = rank

In [None]:
# Print results
print("\n🔹 Perplexity Rankings for Canary Sequences:")
for canary_type, rank in perplexities.items():
    print(f"➡️ {canary_type}: Rank {rank} / 10,000 (Lower rank = Higher memorization risk)")