In [None]:
"""
AUDIO → TEXT STEGANOGRAPHY (GPC) — MAIN PAPER VERSION (0–26 levels)
"""

import os
import random
import numpy as np
import librosa
import librosa.display
import soundfile as sf
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

# ============================================================
# CONFIG (MAIN PAPER SETTINGS)
# ============================================================

AUDIO_FILE = "Audio_1.mp3"  # Input Audio
OUT_DIR = "audio_to_text_26_Audio_1"  # Output Directory
os.makedirs(OUT_DIR, exist_ok=True)

SR = 16000
FRAME = 1024
HOP = 512

P_MAX = 26                      # MAIN PAPER: 0–26 quantization
TILE_W, TILE_H = 40, 50
FONT_SIZE = 36
SEED = 42

COVER_TEXT = (
    "Beneath the midnight sea the pearl city glowed "
    "and memories drifted upward into silence "
) * 200


# ============================================================
# FONT
# ============================================================

def get_font(size=FONT_SIZE):
    paths = ["arial.ttf"]
    for p in paths:
        try:
            return ImageFont.truetype(p, size)
        except:
            pass
    return ImageFont.load_default()

FONT = get_font()


# ============================================================
# GLYPH RASTER FUNCTIONS
# ============================================================

def rasterize_letter(letter: str) -> np.ndarray:
    img = Image.new("L", (TILE_W, TILE_H), 255)
    draw = ImageDraw.Draw(img)
    bbox = draw.textbbox((0, 0), letter, font=FONT)
    x = (TILE_W - (bbox[2] - bbox[0])) // 2
    y = (TILE_H - (bbox[3] - bbox[1])) // 2
    draw.text((x, y), letter, fill=0, font=FONT)
    return np.array(img, dtype=np.uint8)

def encode_glyph(canonical: np.ndarray, value: int, rng: random.Random) -> np.ndarray:
    enc = canonical.copy()
    ys, xs = np.where(canonical == 0)
    pts = list(zip(ys.tolist(), xs.tolist()))
    if value <= 0 or len(pts) == 0:
        return enc
    k = min(value, len(pts))
    for (r, c) in rng.sample(pts, k):
        enc[r, c] = 1
    return enc

def decode_glyph(canonical: np.ndarray, encoded: np.ndarray) -> int:
    return int(((canonical == 0) & (encoded == 1)).sum())

def arrange_tiles(tiles, per_row=80) -> np.ndarray:
    blank = np.full((TILE_H, TILE_W), 255, np.uint8)
    rows = []
    for i in range(0, len(tiles), per_row):
        row = tiles[i:i + per_row]
        if len(row) < per_row:
            row += [blank] * (per_row - len(row))
        rows.append(np.hstack(row))
    return np.vstack(rows)


# ============================================================
# AUDIO → FRAME RMS → INTEGER PAYLOAD
# ============================================================

print("Loading audio:", AUDIO_FILE)
y, sr = librosa.load(AUDIO_FILE, sr=SR, mono=True)

frames = librosa.util.frame(y, frame_length=FRAME, hop_length=HOP).T
scalars = np.sqrt(np.mean(frames**2, axis=1))

Smin, Smax = float(scalars.min()), float(scalars.max())

vals = np.floor((scalars - Smin) / (Smax - Smin + 1e-12) * P_MAX).astype(int)
vals = np.clip(vals, 0, P_MAX)

print("Frames:", len(vals))
print("Payload range:", int(vals.min()), int(vals.max()))


# ============================================================
# BUILD GLYPH STREAM
# ============================================================

letters = [c for c in COVER_TEXT.upper() if c.isalpha()]
while len(letters) < len(vals):
    letters += letters
letters = letters[:len(vals)]

rng = random.Random(SEED)

canonical_tiles = []
encoded_tiles = []

for ch, v in zip(letters, vals):
    can = rasterize_letter(ch)
    enc = encode_glyph(can, int(v), rng)
    canonical_tiles.append(can)
    encoded_tiles.append(enc)

canonical_img = arrange_tiles(canonical_tiles)
encoded_img = arrange_tiles(encoded_tiles)

Image.fromarray(canonical_img).save(f"{OUT_DIR}/canonical.png")
Image.fromarray(encoded_img).save(f"{OUT_DIR}/encoded.png")


# ============================================================
# DIFFERENCE RASTER (IMPORTANT FOR PAPER)
# ============================================================

# Per-glyph difference
diff_tiles = [
    enc.astype(np.int16) - can.astype(np.int16)
    for can, enc in zip(canonical_tiles, encoded_tiles)
]

diff_img = arrange_tiles(diff_tiles)

# Raw difference (diagnostic)
Image.fromarray(
    np.clip(diff_img + 128, 0, 255).astype(np.uint8)
).save(f"{OUT_DIR}/difference_raw.png")

# Heatmap (paper figure)
diff_heat = np.clip(diff_img * 255, 0, 255).astype(np.uint8)

plt.figure(figsize=(12, 8))
plt.imshow(diff_heat, cmap="hot")
plt.colorbar(label="Perturbation (+1 gray level)")
plt.title("Audio → Text Glyph Perturbation Heatmap")
plt.axis("off")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/difference_heatmap.png", dpi=300)
plt.close()


# ============================================================
# DECODE PAYLOAD
# ============================================================

decoded_vals = np.array(
    [decode_glyph(c, e) for c, e in zip(canonical_tiles, encoded_tiles)],
    dtype=int
)

mean_decoding_error = float(np.mean(np.abs(decoded_vals - vals)))
print("Mean payload decoding error:", mean_decoding_error)


# ============================================================
# RECONSTRUCT AUDIO
# ============================================================

scalars_rec = decoded_vals / P_MAX * (Smax - Smin) + Smin
frames_rec = frames * (scalars_rec[:, None] / (scalars[:, None] + 1e-12))

y_rec = np.zeros(len(y), dtype=np.float32)
count = np.zeros(len(y), dtype=np.float32)

for i, f in enumerate(frames_rec):
    start = i * HOP
    y_rec[start:start + FRAME] += f.astype(np.float32)
    count[start:start + FRAME] += 1

y_rec /= np.maximum(count, 1.0)
sf.write(f"{OUT_DIR}/decoded.wav", y_rec, SR)


# ============================================================
# METRICS
# ============================================================

L = min(len(y), len(y_rec))
mse = float(np.mean((y[:L] - y_rec[:L])**2))
mae = float(np.mean(np.abs(y[:L] - y_rec[:L])))

noise = y[:L] - y_rec[:L]
snr = 10 * np.log10(np.sum(y[:L]**2) / (np.sum(noise**2) + 1e-12))

print("MSE:", mse)
print("MAE:", mae)
print("SNR:", snr)

with open(f"{OUT_DIR}/metrics.txt", "w", encoding="utf-8") as f:
    f.write("AUDIO TO TEXT (GPC) - MAIN PAPER (0-26)\n")
    f.write(f"Audio file: {AUDIO_FILE}\n")
    f.write(f"SR: {SR}, FRAME: {FRAME}, HOP: {HOP}\n")
    f.write(f"P_MAX: {P_MAX}\n")
    f.write(f"Frames: {len(vals)}\n")
    f.write(f"Payload range: {int(vals.min())}..{int(vals.max())}\n")
    f.write(f"Mean payload decoding error: {mean_decoding_error}\n")
    f.write(f"MSE: {mse}\n")
    f.write(f"MAE: {mae}\n")
    f.write(f"SNR (dB): {snr}\n")


# ============================================================
# WAVEFORM FIGURE
# ============================================================

plt.figure(figsize=(12, 4))
plt.plot(y[:4000], label="Original", alpha=0.7)
plt.plot(y_rec[:4000], label="Decoded (0–26)", alpha=0.7)
plt.legend()
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/waveform.png", dpi=300)
plt.close()


# ============================================================
# SPECTROGRAM COMPARISON (FIXED REF)
# ============================================================

S_orig = np.abs(librosa.stft(y, n_fft=1024, hop_length=HOP))
S_rec  = np.abs(librosa.stft(y_rec, n_fft=1024, hop_length=HOP))

ref = np.max(S_orig)
S_orig_db = librosa.amplitude_to_db(S_orig, ref=ref)
S_rec_db  = librosa.amplitude_to_db(S_rec,  ref=ref)
S_err_db  = np.abs(S_orig_db - S_rec_db)

plt.figure(figsize=(15, 10))

plt.subplot(3, 1, 1)
librosa.display.specshow(S_orig_db, sr=SR, hop_length=HOP,
                         x_axis="time", y_axis="hz", cmap="magma",
                         vmin=-80, vmax=0)
plt.colorbar(format="%+2.0f dB")
plt.title("Original Spectrogram")

plt.subplot(3, 1, 2)
librosa.display.specshow(S_rec_db, sr=SR, hop_length=HOP,
                         x_axis="time", y_axis="hz", cmap="magma",
                         vmin=-80, vmax=0)
plt.colorbar(format="%+2.0f dB")
plt.title("Decoded Spectrogram (0–26)")

plt.subplot(3, 1, 3)
librosa.display.specshow(S_err_db, sr=SR, hop_length=HOP,
                         x_axis="time", y_axis="hz", cmap="inferno",
                         vmin=0, vmax=20)
plt.colorbar(format="%+2.0f dB")
plt.title("Spectrogram Error |Original − Decoded|")

plt.tight_layout()
plt.savefig(f"{OUT_DIR}/audio_spectrogram_comparison_fixed.png", dpi=300)
plt.close()

print("✓ Finished. Outputs saved to:", OUT_DIR)
