In [1]:
# Embedding Inversion Attack Demonstration
# This notebook shows how embeddings can leak sensitive information

# %% [markdown]
# ## 1. Setup and Create Embeddings

# %%
import numpy as np
from sentence_transformers import SentenceTransformer
import torch
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
import warnings
warnings.filterwarnings('ignore')

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
print(f"Model dimension: {model.get_sentence_embedding_dimension()}")

# Sensitive documents to embed
sensitive_texts = [
    "John Doe's SSN is 123-45-6789",
    "Credit card 4532-1234-5678-9012",
    "Password: MyS3cr3tP@ssw0rd!",
    "API key: sk-1234567890abcdef",
    "Salary: $120,000 per year",
    "Medical diagnosis: diabetes type 2",
    "Phone: 555-123-4567",
    "Email: john.doe@example.com"
]

# Create embeddings
embeddings = model.encode(sensitive_texts)
print(f"Created {len(embeddings)} embeddings of dimension {embeddings.shape[1]}")

# %% [markdown]
# ## 2. Attack Method 1: Nearest Neighbor Attack
# If an attacker has access to a reference dataset, they can find similar texts

# %%
# Simulate attacker's reference dataset
reference_texts = [
    # SSN patterns
    "My SSN is 987-65-4321",
    "Social security number 111-22-3333",
    "SSN: 444-55-6666",
    # Credit card patterns
    "Visa card 4111-1111-1111-1111",
    "Credit card number 5500-0000-0000-0004",
    "Card: 3700-0000-0000-002",
    # Password patterns
    "Password: SecretPass123!",
    "My password is P@ssw0rd",
    "Login: Admin123!",
    # Salary patterns
    "Annual salary $95,000",
    "Makes $150,000 per year",
    "Salary: $75,000 annually",
    # Medical patterns
    "Diagnosed with hypertension",
    "Patient has diabetes",
    "Medical condition: asthma"
]

reference_embeddings = model.encode(reference_texts)

# Find nearest neighbors for each sensitive embedding
print("=== NEAREST NEIGHBOR ATTACK ===\n")
for i, (text, emb) in enumerate(zip(sensitive_texts, embeddings)):
    # Calculate similarities
    similarities = cosine_similarity([emb], reference_embeddings)[0]
    best_match_idx = np.argmax(similarities)
    
    print(f"Target: {text}")
    print(f"Best match: {reference_texts[best_match_idx]}")
    print(f"Similarity: {similarities[best_match_idx]:.4f}")
    print()

# %% [markdown]
# ## 3. Attack Method 2: Token Probing Attack
# Test if specific sensitive tokens are present in the text

# %%
# Tokens to probe for
probe_tokens = [
    "SSN", "123-45-6789", "John", "Doe",
    "credit", "card", "4532", "password",
    "salary", "$120,000", "diabetes", "API",
    "secret", "medical", "diagnosis"
]

# Create probe embeddings
probe_embeddings = model.encode(probe_tokens)

print("=== TOKEN PROBING ATTACK ===\n")
# For each sensitive text, check which tokens have high similarity
for i, (text, emb) in enumerate(zip(sensitive_texts, embeddings)):
    print(f"Analyzing: {text}")
    
    # Calculate similarities with all probes
    similarities = cosine_similarity([emb], probe_embeddings)[0]
    
    # Find high-similarity tokens
    high_sim_indices = np.where(similarities > 0.3)[0]
    if len(high_sim_indices) > 0:
        print("Detected tokens:")
        for idx in high_sim_indices:
            print(f"  - '{probe_tokens[idx]}' (similarity: {similarities[idx]:.3f})")
    print()

# %% [markdown]
# ## 4. Attack Method 3: Gradient-Based Inversion (Conceptual)
# With model access, we can try to reconstruct text that produces similar embeddings

# %%
# This is a simplified demonstration of the concept
# In practice, this would require access to the model's gradients

def measure_embedding_leakage(original_text, embedding, model, num_probes=100):
    """
    Measure how much information leaks by testing random modifications
    """
    # Generate variations of common sensitive patterns
    patterns = [
        "SSN XXX-XX-XXXX",
        "Credit card XXXX-XXXX-XXXX-XXXX",
        "Password: XXXXXXXX",
        "Salary $XXX,XXX",
        "Diagnosis: XXXXX"
    ]
    
    best_match = None
    best_similarity = -1
    
    for pattern in patterns:
        # Test different values for the pattern
        for _ in range(num_probes // len(patterns)):
            # Generate a random instantiation
            test_text = pattern
            if "SSN" in pattern:
                test_text = test_text.replace("XXX-XX-XXXX", 
                    f"{np.random.randint(100,999)}-{np.random.randint(10,99)}-{np.random.randint(1000,9999)}")
            elif "card" in pattern:
                test_text = test_text.replace("XXXX-XXXX-XXXX-XXXX",
                    f"{np.random.randint(1000,9999)}-{np.random.randint(1000,9999)}-{np.random.randint(1000,9999)}-{np.random.randint(1000,9999)}")
            elif "Salary" in pattern:
                test_text = test_text.replace("XXX,XXX",
                    f"{np.random.randint(50,200)},{np.random.randint(0,999):03d}")
            
            # Get embedding and compare
            test_embedding = model.encode([test_text])[0]
            similarity = 1 - cosine(embedding, test_embedding)
            
            if similarity > best_similarity:
                best_similarity = similarity
                best_match = test_text
    
    return best_match, best_similarity

print("=== PATTERN MATCHING ATTACK ===\n")
for text, emb in zip(sensitive_texts[:3], embeddings[:3]):  # Demo on first 3
    reconstructed, similarity = measure_embedding_leakage(text, emb, model)
    print(f"Original: {text}")
    print(f"Best reconstruction: {reconstructed}")
    print(f"Similarity: {similarity:.4f}\n")

# %% [markdown]
# ## 5. Attack Method 4: Embedding Space Analysis
# Analyze the geometry of embeddings to infer properties

# %%
# Calculate centroid of different types of sensitive data
ssn_embeddings = embeddings[0:1]  # SSN example
financial_embeddings = embeddings[1:2]  # Credit card
credential_embeddings = embeddings[2:4]  # Password and API key
pii_embeddings = embeddings[4:8]  # Other PII

# Create "category vectors" by averaging similar types
categories = {
    "SSN-like": model.encode(["SSN 000-00-0000", "Social Security 111-11-1111"]).mean(axis=0),
    "Financial": model.encode(["Credit card 0000-0000-0000-0000", "Bank account 12345"]).mean(axis=0),
    "Credentials": model.encode(["Password: xxxx", "API key: xxxx"]).mean(axis=0),
    "Medical": model.encode(["Diagnosis: condition", "Medical record"]).mean(axis=0)
}

print("=== CATEGORY DETECTION ===\n")
for text, emb in zip(sensitive_texts, embeddings):
    print(f"Text: {text}")
    
    # Find closest category
    best_category = None
    best_sim = -1
    for cat_name, cat_emb in categories.items():
        sim = 1 - cosine(emb, cat_emb)
        if sim > best_sim:
            best_sim = sim
            best_category = cat_name
    
    print(f"Detected category: {best_category} (confidence: {best_sim:.3f})\n")

# %% [markdown]
# ## 6. Visualization of Information Leakage

# %%
from sklearn.manifold import TSNE

# Combine sensitive and reference embeddings
all_embeddings = np.vstack([embeddings, reference_embeddings])
all_texts = sensitive_texts + reference_texts

# Reduce to 2D for visualization
tsne = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne.fit_transform(all_embeddings)

# Plot
plt.figure(figsize=(12, 8))

# Plot sensitive texts in red
plt.scatter(embeddings_2d[:len(sensitive_texts), 0], 
           embeddings_2d[:len(sensitive_texts), 1], 
           c='red', s=100, label='Sensitive Data', marker='o')

# Plot reference texts in blue
plt.scatter(embeddings_2d[len(sensitive_texts):, 0], 
           embeddings_2d[len(sensitive_texts):, 1], 
           c='blue', s=100, label='Reference Data', marker='^')

# Add labels for sensitive texts
for i, txt in enumerate(sensitive_texts):
    plt.annotate(txt[:20] + "...", 
                (embeddings_2d[i, 0], embeddings_2d[i, 1]),
                xytext=(5, 5), textcoords='offset points', 
                fontsize=8, color='darkred')

plt.title("Embedding Space Visualization - Sensitive Data Clustering")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# %% [markdown]
# ## 7. Summary: What Can Be Recovered

# %%
print("=== EMBEDDING INVERSION ATTACK SUMMARY ===\n")

print("1. PATTERN DETECTION:")
print("   - Can detect the TYPE of sensitive data (SSN, credit card, password)")
print("   - Can narrow down to specific patterns and formats")

print("\n2. PARTIAL RECONSTRUCTION:")
print("   - Can identify presence of specific tokens or words")
print("   - Can approximate numerical ranges (e.g., salary brackets)")

print("\n3. SIMILARITY MATCHING:")
print("   - With a reference dataset, can find very similar texts")
print("   - Can identify individuals if their data appears in training sets")

print("\n4. CATEGORY INFERENCE:")
print("   - Can determine the general category of information")
print("   - Can distinguish between medical, financial, credential data")

print("\n⚠️  CONCLUSION: Embeddings are NOT safe for sensitive data!")
print("Even without the original text, attackers can extract significant information.")
print("\n✅ This is why encrypted vector databases are essential for sensitive data.")

  from .autonotebook import tqdm as notebook_tqdm


Model dimension: 384
Created 8 embeddings of dimension 384
=== NEAREST NEIGHBOR ATTACK ===

Target: John Doe's SSN is 123-45-6789
Best match: SSN: 444-55-6666
Similarity: 0.6694

Target: Credit card 4532-1234-5678-9012
Best match: Visa card 4111-1111-1111-1111
Similarity: 0.7817

Target: Password: MyS3cr3tP@ssw0rd!
Best match: My password is P@ssw0rd
Similarity: 0.7289

Target: API key: sk-1234567890abcdef
Best match: SSN: 444-55-6666
Similarity: 0.3517

Target: Salary: $120,000 per year
Best match: Makes $150,000 per year
Similarity: 0.7349

Target: Medical diagnosis: diabetes type 2
Best match: Patient has diabetes
Similarity: 0.6339

Target: Phone: 555-123-4567
Best match: Social security number 111-22-3333
Similarity: 0.5628

Target: Email: john.doe@example.com
Best match: My password is P@ssw0rd
Similarity: 0.3032

=== TOKEN PROBING ATTACK ===

Analyzing: John Doe's SSN is 123-45-6789
Detected tokens:
  - 'SSN' (similarity: 0.610)
  - '123-45-6789' (similarity: 0.427)
  - 'John' (

ValueError: perplexity must be less than n_samples