In [2]:
"""
TEXT to TEXT STEGANOGRAPHY (Glyph Perturbation Cardinality, GPC)

This script implements the foundational instantiation of the proposed
raster-domain steganographic framework. Secret text characters are embedded
into cover text by operating exclusively on the rasterized glyph images
produced by a deterministic font rendering pipeline.

Each cover character is rendered into a fixed-size grayscale bitmap.
A secret character is mapped to an integer payload v ∈ [1, 26], corresponding
to letters A–Z, and encoded by perturbing exactly v interior ink pixels
within the glyph. Perturbations are applied strictly after rasterization,
leaving glyph outlines, contours, and visual identity unchanged.

Decoding is performed by re-rendering the cover text using identical
rasterization parameters and counting pixel-wise differences between
canonical and encoded glyph rasters.

This experiment validates:
- Deterministic raster behavior under fixed rendering parameters
- Perfect reversibility (CER = 0, BER = 0)
- Visual imperceptibility of interior pixel perturbations
- The feasibility of glyph perturbation cardinality as a lossless text
  steganographic channel

"""

# IMPORTING LIBRARIES
import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt

# TEXT to TEXT (Glyph Perturbation Cardinality, deterministic)
# # EXPERIMENT CONFIGURATION
OUT_DIR = "text_to_text_results_final"
os.makedirs(OUT_DIR, exist_ok=True)

# Deterministic rendering parameters
FONT_SIZE = 160
CANVAS_H = 256
CANVAS_W = 200

# Grayscale convention
BG = 255            # white background
INK = 0             # black ink
DELTA = 1           # +1 gray-level perturbation (paper setting)

# Payload constraints
P_MAX = 26          # A..Z
SEED = 42           # global determinism

# Demo strings (glyph-aligned)
COVER_TEXT = "HELLO"
SECRET_TEXT = "WORLD"  # must match length for clean demo


# COVER TEXT UTILIZATION ANALYSIS 
cover_letters_all = [c for c in COVER_TEXT.upper() if c.isalpha()]
total_cover_letters = len(cover_letters_all)

required_letters = len(SECRET_TEXT)
used_letters = required_letters
unused_letters = max(0, total_cover_letters - used_letters)

utilization_pct = (used_letters / total_cover_letters * 100) if total_cover_letters > 0 else 0.0

print("\n===== COVER TEXT UTILIZATION =====")
print("Total alphabetic characters in cover text :", total_cover_letters)
print("Payload characters required              :", required_letters)
print("Glyphs used for embedding                :", used_letters)
print("Glyphs unused                            :", unused_letters)
print(f"Cover text utilization                   : {utilization_pct:.2f}%")

with open(f"{OUT_DIR}/coverage.txt", "w", encoding="utf-8") as f:
    f.write("TEXT to TEXT COVERAGE REPORT\n")
    f.write(f"Total cover letters   : {total_cover_letters}\n")
    f.write(f"Payload length        : {required_letters}\n")
    f.write(f"Glyphs used           : {used_letters}\n")
    f.write(f"Glyphs unused         : {unused_letters}\n")
    f.write(f"Utilization (%)       : {utilization_pct:.2f}\n")
    

# FONT LOADING (robust loading)
def get_font(size=FONT_SIZE):
    candidates = [
        "arial.ttf",
        "DejaVuSansMono.ttf",
        "/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf",
    ]
    for p in candidates:
        try:
            return ImageFont.truetype(p, size)
        except:
            pass
    return ImageFont.load_default()


FONT = get_font()


# RASTERIZE GLYPH (deterministic)
def rasterize_letter(letter: str) -> np.ndarray:
    """
    Rasterize a single character into a fixed-size grayscale tile.
    Identical parameters -> identical rasters (deterministic pipeline assumption).
    """
    img = Image.new("L", (CANVAS_W, CANVAS_H), BG)
    draw = ImageDraw.Draw(img)

    bbox = draw.textbbox((0, 0), letter, font=FONT)
    w = bbox[2] - bbox[0]
    h = bbox[3] - bbox[1]

    x = (CANVAS_W - w) // 2
    y = (CANVAS_H - h) // 2
    draw.text((x, y), letter, fill=INK, font=FONT)

    return np.array(img, dtype=np.uint8)


# PAYLOAD MAPPING (paper’s A..Z)
def char_to_payload(ch: str) -> int:
    """
    Map 'A'..'Z' -> 1..26. Non-alpha returns 0.
    """
    ch = ch.upper()
    if "A" <= ch <= "Z":
        return ord(ch) - ord("A") + 1
    return 0


def payload_to_char(v: int) -> str:
    """
    Map 1..26 -> 'A'..'Z'. Else '?'.
    """
    if 1 <= v <= 26:
        return chr(v + ord("A") - 1)
    return "?"


# ENCODE / DECODE SINGLE GLYPH
def encode_glyph(canonical: np.ndarray, value: int, rng: random.Random) -> np.ndarray:
    """
    Encode an integer value v by perturbing exactly v eligible ink pixels.
    Eligible pixels: canonical == 0 (interior ink pixels only, conservative).
    Perturbation: increment pixel intensity by +DELTA (0->1).
    """
    enc = canonical.copy()

    # Eligible ink pixels (paper model: Ii = {p | Gi(p)=0})
    ys, xs = np.where(canonical == INK)
    pts = list(zip(ys.tolist(), xs.tolist()))

    if value <= 0 or len(pts) == 0:
        return enc

    k = min(value, len(pts))
    chosen = rng.sample(pts, k)

    for (r, c) in chosen:
        enc[r, c] = np.uint8(min(255, int(enc[r, c]) + DELTA))

    return enc


def decode_glyph(canonical: np.ndarray, encoded: np.ndarray) -> int:
    """
    Recover v by counting pixels where canonical==0 and encoded==0+DELTA.
    """
    diff = (canonical == INK) & (encoded == (INK + DELTA))
    return int(diff.sum())


# STRING-LEVEL ENCODING / DECODING
def encode_text(cover: str, secret: str):
    """
    Encode secret into cover (glyph aligned).
    For clean experiments, require equal length.
    """
    if len(cover) != len(secret):
        raise ValueError(f"Cover and secret must have equal length. Got {len(cover)} vs {len(secret)}")

    rng = random.Random(SEED)

    canonical_tiles = []
    encoded_tiles = []
    payloads = []
    capacities = []

    for i, (cch, sch) in enumerate(zip(cover, secret)):
        can = rasterize_letter(cch.upper())
        v = char_to_payload(sch)

        # Capacity diagnostic
        cap = int((can == INK).sum())

        enc = encode_glyph(can, v, rng)

        canonical_tiles.append(can)
        encoded_tiles.append(enc)
        payloads.append(v)
        capacities.append(cap)

    canonical = np.hstack(canonical_tiles)
    encoded = np.hstack(encoded_tiles)
    return canonical, encoded, payloads, capacities


def decode_text(cover: str, canonical: np.ndarray, encoded: np.ndarray) -> str:
    """
    Decode by re-slicing each glyph tile and counting perturbations.
    """
    decoded = []
    for i in range(len(cover)):
        x0 = i * CANVAS_W
        x1 = x0 + CANVAS_W
        can = canonical[:, x0:x1]
        enc = encoded[:, x0:x1]
        v = decode_glyph(can, enc)
        decoded.append(payload_to_char(v))
    return "".join(decoded)


# METRICS
def character_error_rate(true_text: str, decoded_text: str) -> float:
    mismatches = sum(1 for t, d in zip(true_text, decoded_text) if t != d)
    return mismatches / max(1, len(true_text))


def bit_error_rate(true_text: str, decoded_text: str) -> float:
    def bits(c): return format(ord(c), "08b")
    total, wrong = 0, 0
    for t, d in zip(true_text, decoded_text):
        tb, db = bits(t), bits(d)
        total += 8
        wrong += sum(a != b for a, b in zip(tb, db))
    return wrong / max(1, total)


def mse(a: np.ndarray, b: np.ndarray) -> float:
    diff = a.astype(np.float32) - b.astype(np.float32)
    return float(np.mean(diff * diff))

# MAIN EXPERIMENT
canonical, encoded, payloads, capacities = encode_text(COVER_TEXT, SECRET_TEXT)
decoded = decode_text(COVER_TEXT, canonical, encoded)

print("Cover   :", COVER_TEXT)
print("Secret  :", SECRET_TEXT)
print("Decoded :", decoded)

CER = character_error_rate(SECRET_TEXT, decoded)
BER = bit_error_rate(SECRET_TEXT, decoded)
MSE = mse(canonical, encoded)

print("\n===== METRICS =====")
print("CER:", CER)
print("BER:", BER)
print("MSE:", MSE)
print("Payloads (v):", payloads)
print("Capacities (#ink pixels):", capacities)

with open(f"{OUT_DIR}/metrics.txt", "w", encoding="utf-8") as f:
    f.write("TEXT to TEXT METRICS REPORT\n")
    f.write(f"CER   : {CER}\n")
    f.write(f"BER        : {BER}\n")
    f.write(f"MSE           : {MSE}\n")
    f.write(f"PAYLOADS         : {payloads}\n")
    f.write(f"CAPACITIES (#INK PIXELS)       : {capacities}\n")
    

# MAIN EXPERIMENT
diff = encoded.astype(int) - canonical.astype(int)
diff_img = np.clip(diff * 40, 0, 255).astype(np.uint8)

Image.fromarray(canonical).save(f"{OUT_DIR}/canonical.png")
Image.fromarray(encoded).save(f"{OUT_DIR}/encoded.png")
Image.fromarray(diff_img).save(f"{OUT_DIR}/difference.png")

plt.figure(figsize=(18, 6))
plt.subplot(1, 3, 1); plt.title("Canonical"); plt.imshow(canonical, cmap="gray"); plt.axis("off")
plt.subplot(1, 3, 2); plt.title("Encoded"); plt.imshow(encoded, cmap="gray"); plt.axis("off")
plt.subplot(1, 3, 3); plt.title("Difference"); plt.imshow(diff_img, cmap="hot"); plt.axis("off")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/raster_triplet.png", dpi=300)
plt.close()

# Histogram of differences
plt.figure(figsize=(8, 5))
plt.hist(diff.flatten(), bins=50, alpha=0.75)
plt.title("Pixel Difference Histogram")
plt.xlabel("Encoded - Canonical")
plt.ylabel("Frequency")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/difference_histogram.png", dpi=300)
plt.close()

# Payload vs capacity plots
plt.figure(figsize=(8, 4))
plt.bar(range(len(payloads)), payloads)
plt.title("Perturbation Count Per Glyph (v)")
plt.xlabel("Glyph index")
plt.ylabel("v (pixels perturbed)")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/perturbation_counts.png", dpi=300)
plt.close()

plt.figure(figsize=(8, 4))
plt.bar(range(len(capacities)), capacities)
plt.title("Ink Pixel Capacity Per Glyph")
plt.xlabel("Glyph index")
plt.ylabel("# eligible ink pixels")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/stroke_capacity.png", dpi=300)
plt.close()

print("\nAll outputs saved to:", OUT_DIR)



===== COVER TEXT UTILIZATION =====
Total alphabetic characters in cover text : 5
Payload characters required              : 5
Glyphs used for embedding                : 5
Glyphs unused                            : 0
Cover text utilization                   : 100.00%
Cover   : HELLO
Secret  : WORLD
Decoded : WORLD

===== METRICS =====
CER: 0.0
BER: 0.0
MSE: 0.00028124998789280653
Payloads (v): [23, 15, 18, 12, 4]
Capacities (#ink pixels): [4013, 4249, 2351, 2351, 4202]

All outputs saved to: text_to_text_results_final
