# EncypherAI v2.1.0 Demo

This notebook demonstrates the core functionality of EncypherAI v2.1.0, focusing on the new Ed25519 digital signature support for embedding and verifying metadata in text.

## 1. Setup

Import necessary classes and functions.

In [None]:
!uv pip install encypher-ai

In [None]:
import json
import copy
from datetime import datetime, timezone
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import ed25519

from encypher import Settings, UnicodeMetadata
from encypher.core.constants import MetadataTarget  # Optional: if changing target
from encypher.core.crypto_utils import (
    generate_key_pair,
    load_private_key,
    load_public_key,
)

## 2. Key Generation

Generate an Ed25519 key pair for signing and verification. In a real application, you would securely store and manage these keys.

In [None]:
private_key, public_key = generate_key_pair()
signer_id = "encypher-colab-test"  # A unique ID for the key owner

# (Optional) Show keys in PEM format
private_pem = private_key.private_bytes(
    encoding=serialization.Encoding.PEM,
    format=serialization.PrivateFormat.PKCS8,
    encryption_algorithm=serialization.NoEncryption(),
)
public_pem = public_key.public_bytes(
    encoding=serialization.Encoding.PEM,
    format=serialization.PublicFormat.SubjectPublicKeyInfo,
)

print(f"Signer ID: {signer_id}")
print(f"\nPrivate Key (PEM):\n{private_pem.decode()}") # Keep private keys secret!
print(f"\nPublic Key (PEM):\n{public_pem.decode()}")

## 3. Public Key Provider Function

Verification requires a way to look up the correct public key based on the `signer_id` found in the metadata. This is a simple example; real implementations might involve databases or key management systems.

In [None]:
# Store known public keys (in a real app, this would be more robust)
known_public_keys = {signer_id: public_key}


def public_key_provider(signer_id_to_lookup: str) -> ed25519.Ed25519PublicKey | None:
    """Simple function to return a public key based on signer_id."""
    print(f"Looking up public key for: {signer_id_to_lookup}")
    return known_public_keys.get(signer_id_to_lookup)

## 4. Basic Format Example

Embed and verify simple key-value metadata.

In [None]:
sample_text_basic = "This is the original text where we want to embed some basic information."
basic_metadata = {
    "document_id": "doc-12345",
    "version": "1.0",
}

print(f"Original Text:\n{sample_text_basic}")
print(f"\nMetadata to Embed:\n{basic_metadata}")

# Embed metadata using the private key
embedded_text_basic = UnicodeMetadata.embed_metadata(
    text=sample_text_basic,
    private_key=private_key,
    signer_id=signer_id,
    metadata_format="basic",
    custom_metadata=basic_metadata,
    timestamp=int(datetime.now(timezone.utc).timestamp())
)

print(f"\nText with Embedded Basic Metadata:\n{embedded_text_basic}")

# Verify and extract metadata using the public key provider
is_valid_basic, extracted_signer_id_basic, extracted_payload_basic = (
    UnicodeMetadata.verify_and_extract_metadata(
        text=embedded_text_basic, public_key_provider=public_key_provider
    )
)

print(f"\nVerification Result (Basic): {is_valid_basic}")
if is_valid_basic:
    print(f"Extracted Signer ID: {extracted_signer_id_basic}")
    print(f"Extracted Payload (Basic):\n{json.dumps(extracted_payload_basic, indent=2)}")

## 5. Manifest Format Example

Embed and verify structured metadata using the C2PA-inspired manifest format.

In [None]:
sample_text_manifest = "This paragraph was generated by an AI assistant to summarize a complex topic."
manifest_metadata = {
    "claim_generator": "MyAwesomeApp/1.0",
    "actions": [
        {"action": "c2pa.created", "when": int(datetime.now(timezone.utc).timestamp()), "softwareAgent": "EncypherAI SDK v2.0"},
        {"action": "c2pa.edited", "when": int(datetime.now(timezone.utc).timestamp()), "softwareAgent": "TextEditor v5"},
    ],
    "ai_info": {  # Optional AI-specific info
        "model_name": "GPT-4o",
        "prompt": "Summarize the concept of zero-width characters.",
    },
    "custom_claims": {  # Other arbitrary claims
        "project_code": "ProjectX",
        "review_status": "Pending",
    },
}

print(f"Original Text:\n{sample_text_manifest}")
print(f"\nMetadata to Embed (Manifest):\n{json.dumps(manifest_metadata, indent=2, default=str)}")  # Use default=str for datetime

# Embed manifest metadata using the private key
embedded_text_manifest = UnicodeMetadata.embed_metadata(
    text=sample_text_manifest,
    private_key=private_key,
    signer_id=signer_id,
    metadata_format="manifest",
    claim_generator=manifest_metadata['claim_generator'],
    actions=manifest_metadata['actions'],
    ai_info=manifest_metadata.get('ai_info'),
    custom_claims=manifest_metadata.get('custom_claims'),
    timestamp=int(datetime.now(timezone.utc).timestamp())
)

print(f"\nText with Embedded Manifest Metadata:\n{embedded_text_manifest}")

# Verify and extract metadata using the public key provider

# Verify and extract metadata using the public key provider
is_valid_manifest, extracted_signer_id_manifest, extracted_payload_manifest = (
    UnicodeMetadata.verify_and_extract_metadata(
        text=embedded_text_manifest, public_key_provider=public_key_provider
    )
)

print(f"\nVerification Result (Manifest): {is_valid_manifest}")
if is_valid_manifest:
    print(f"Extracted Signer ID: {extracted_signer_id_manifest}")
    print(f"Extracted Payload (Manifest):\n{json.dumps(extracted_payload_manifest, indent=2)}")

## 6. Tamper Detection Example

Demonstrate that modifying the text after embedding invalidates the signature.

In [None]:
# Take the previously embedded text (basic format)
print(f"Original Embedded Text:\n{embedded_text_basic}")

# Tamper with the text by inserting a character into the variation selector stream.
# This should break the contiguity of the selectors and cause verification to fail.
original_first_space_index = embedded_text_basic.find(" ")
if original_first_space_index != -1 and len(embedded_text_basic) > original_first_space_index + 1:
    # The variation selectors start after the first space.
    # Let's insert an 'X' *after the first variation selector*.
    # The first VS is at index: original_first_space_index + 1
    # We insert 'X' at index: original_first_space_index + 2
    
    insertion_point = original_first_space_index + 2
    
    # Ensure we don't go out of bounds if the embedded text is unexpectedly short
    if insertion_point <= len(embedded_text_basic):
        tampered_text_basic = (
            embedded_text_basic[:insertion_point]  # Text up to and including the first VS
            + "X"                                  # Insert the tampering character
            + embedded_text_basic[insertion_point:] # The rest of the text (including other VS)
        )
        print(f"Tampering by inserting 'X' after the first variation selector (at text index {insertion_point}).")
    else:
        # Fallback if the text is too short to insert at the desired point
        tampered_text_basic = embedded_text_basic + "!" 
        print(f"Warning: Embedded text too short. Tampering by appending '!'.")
else:
    # Fallback if no space is found or text is too short
    tampered_text_basic = embedded_text_basic + "!" 
    print(f"Warning: No space found or text too short. Tampering by appending '!'.")

print(f"\nTampered Text:\n{tampered_text_basic}")

# Attempt to verify the tampered text
is_valid_tampered, extracted_signer_id_tampered, extracted_payload_tampered = (
    UnicodeMetadata.verify_and_extract_metadata(
        text=tampered_text_basic, public_key_provider=public_key_provider
    )
)

print(f"\nVerification Result (Tampered): {is_valid_tampered}")
if not is_valid_tampered:
    print("Verification correctly failed for tampered text.")
    # If return_payload_on_failure=True (default is False), payload might be present.
    # If return_payload_on_failure=False, payload should be None.
    print(f"Extracted Signer ID (if any): {extracted_signer_id_tampered}")
    print(f"Extracted Payload (if any):\n{extracted_payload_tampered}")
else:
    # This case should ideally not be hit if tampering is effective.
    print("Verification UNEXPECTEDLY PASSED for tampered text. This indicates the tampering was not effective or there's another issue.")
    print(f"Extracted Signer ID: {extracted_signer_id_tampered}")
    print(f"Extracted Payload:\n{json.dumps(extracted_payload_tampered, indent=2) if extracted_payload_tampered else 'None'}")


# 7. Tamper Detection Example #2
Modifying the metadata without re-signing with the original private key should fail verification.

In [None]:
print("\n\n--- Tamper Detection Example 2: Modifying Payload Data, Reusing Original Signature ---")

# 1. Reconstruct the *exact* payload dictionary that embed_metadata would create internally.
#    This needs to match the structure created within the embed_metadata method.
#    We'll get the timestamp from the 'embedded_text_basic' if possible, or use a recent one.
#    For this simulation, let's re-use 'extracted_payload_basic' from the previous successful verification
#    as it represents the payload that was actually embedded and verified.

if 'extracted_payload_basic' not in locals() or not extracted_payload_basic:
    print("Error: 'extracted_payload_basic' is not defined or is empty. Please ensure the previous cell (basic embedding and verification) has run successfully.")
else:
    original_complete_payload_dict = copy.deepcopy(extracted_payload_basic)

    original_payload_for_signing_str = json.dumps(
        original_complete_payload_dict, 
        sort_keys=True, 
        separators=(',', ':')
    )
    original_payload_for_signing_bytes = original_payload_for_signing_str.encode("utf-8")
    original_signature = private_key.sign(original_payload_for_signing_bytes)
    original_signature_hex = original_signature.hex()

    print(f"Original complete payload (from previous verification) (first 70 chars): {original_payload_for_signing_str[:70]}...")
    print(f"Original signature (first 10 chars): {original_signature_hex[:10]}...")

    # 2. Create a modified payload dictionary (tampering with the content)
    modified_payload_dict = copy.deepcopy(original_complete_payload_dict)
    # Ensure 'custom_metadata' exists before trying to modify it
    if "custom_metadata" not in modified_payload_dict or not isinstance(modified_payload_dict["custom_metadata"], dict):
        modified_payload_dict["custom_metadata"] = {} # Initialize if not present or not a dict
        
    modified_payload_dict["custom_metadata"]["document_id"] = "doc-TAMPERED-999"
    modified_payload_dict["timestamp"] = datetime.now(timezone.utc).isoformat(timespec='seconds').replace('+00:00', 'Z') # New timestamp
    modified_payload_dict["custom_metadata"]["version"] = "6.6.6" # Tamper another field

    modified_payload_for_signing_str = json.dumps(
        modified_payload_dict, 
        sort_keys=True, 
        separators=(',', ':')
    )
    print(f"Modified payload for signing (first 70 chars): {modified_payload_for_signing_str[:70]}...")

    # 3. Construct the "tampered" signed string: MODIFIED payload + ORIGINAL signature.
    tampered_signed_payload_str = f"{modified_payload_for_signing_str}||{original_signature_hex}"
    print(f"Tampered signed string (modified_payload||original_signature) (first 100 chars): {tampered_signed_payload_str[:100]}...")

    # 4. Convert this tampered string to a new sequence of variation selectors
    tampered_bytes_to_embed = tampered_signed_payload_str.encode("utf-8")
    tampered_selector_chars_list = []
    for byte_val in tampered_bytes_to_embed:
        vs = UnicodeMetadata.to_variation_selector(byte_val)
        if vs:
            tampered_selector_chars_list.append(vs)
    tampered_selector_chars_str = "".join(tampered_selector_chars_list)

    # 5. Create the fully tampered text using the original clean text ('sample_text_basic')
    original_first_space_idx = sample_text_basic.find(" ")

    tampered_text_payload_modified = ""
    if original_first_space_idx != -1:
        tampered_text_payload_modified = (
            sample_text_basic[:original_first_space_idx + 1]
            + tampered_selector_chars_str
            + sample_text_basic[original_first_space_idx + 1:]
        )
        print(f"Tampered Text (payload data altered) (snippet): '{tampered_text_payload_modified[:60].replace(tampered_selector_chars_str, '[TAMPERED_VS_BLOCK]')}'...")

        # 6. Attempt to verify this tampered text
        is_valid_tampered, signer_id_tampered, payload_tampered = (
            UnicodeMetadata.verify_and_extract_metadata(
                text=tampered_text_payload_modified, 
                public_key_provider=public_key_provider,
                return_payload_on_failure=True 
            )
        )

        print(f"\nVerification Result (Altered Payload Data): {is_valid_tampered}")
        if not is_valid_tampered:
            print("Verification CORRECTLY FAILED: The signature of the original payload does not match the altered payload data.")
            print(f"Extracted Signer ID (if any, from altered payload): {signer_id_tampered}")
            print("Extracted (but unverified due to signature mismatch) Payload:")
            print(json.dumps(payload_tampered, indent=2) if payload_tampered else "None")
        else:
            print("Verification UNEXPECTEDLY PASSED. This indicates a potential issue.")
            if payload_tampered:
                 print(f"Extracted Signer ID: {signer_id_tampered}")
                 print("Extracted Payload:")
                 print(json.dumps(payload_tampered, indent=2))
    else:
        print("Error: Could not find a space in 'sample_text_basic' to perform this tampering simulation.")

## Conclusion

This notebook showed how to use EncypherAI v2.0.0 to:
1. Generate Ed25519 keys.
2. Define a public key provider.
3. Embed metadata (basic and manifest formats) using a private key.
4. Verify the integrity and authenticity of the embedded metadata using the corresponding public key.
5. Detect tampering attempts.