<a href="https://colab.research.google.com/github/jadin101777/emcc-otp-reuse-lab/blob/main/otp_reuse_lab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# OTP Reuse Lab — EMCC Cybersecurity & Coding Club
This lab demonstrates what happens when a one-time pad (OTP) or key material
is reused between different messages.

 **Goal:** Learn how XOR and crib-dragging can reveal plaintext in reused-key scenarios.

>  Educational use only — do not apply on real systems.


In [7]:
# === INSTRUCTOR ONLY: Generate plaintexts, key (reused) and ciphertexts ===
# Run this in a private instructor Colab / local Python session.
import random
import json
from pathlib import Path

# --- Plaintexts (modify to taste) ---
plaintexts = [
    b"Meet me at the north gate at 2200 hours.",
    b"The backup key is hidden under the loose tile.",
    b"Mission confirmed, proceed as planned.",
    b"The target file is named config_backup.zip.",
    b"Oh my God, they killed Kenny!.",
    b"You bastards"
]

# --- Create a single random key (simulating the vulnerability) ---
key_len = max(len(p) for p in plaintexts)
key = bytes(random.randint(0, 255) for _ in range(key_len))

# --- XOR helper (same as student) ---
def xor_bytes(a: bytes, b: bytes) -> bytes:
    return bytes(x ^ y for x, y in zip(a, b))

# --- Create ciphertexts by XORing each plaintext with the same key ---
cts = [xor_bytes(p, key) for p in plaintexts]

# --- Print ciphertext hex for copying into the student notebook ---
cts_hex = [c.hex() for c in cts]
print("COPY the following list into the STUDENT notebook (cts_hex):\n")
print("[")
for h in cts_hex:
    print(f'  "{h}",')
print("]\n")

# --- OPTIONAL: Save to a JSON file for record-keeping / easy copy/paste ---
out_path = Path("cts_student.json")
out_path.write_text(json.dumps(cts_hex, indent=2))
print(f"Saved ciphertext hex to {out_path} (you can open and copy from this file).")

# --- OPTIONAL: Save plaintexts/key to a private file for instructor reference (DON'T publish) ---
priv = {
    "plaintexts": [p.decode("latin1") for p in plaintexts],
    "key_hex": key.hex(),
    "cts_hex": cts_hex
}
Path("instructor_private_answers.json").write_text(json.dumps(priv, indent=2))
print("Saved instructor answers to instructor_private_answers.json (PRIVATE).")


COPY the following list into the STUDENT notebook (cts_hex):

[
  "0e1c5a52b189b8b2fdbf69952b9a7cbd4b112a351a4a243fd0a923fbed8ca96bb9434eb63d74a49d",
  "17115a06f385bef9e9bb698a26867cba574336345e49202595fc2ceba8ccbb2fe10606b52769a4d682808363beb8",
  "0e104c55f88bb3b2ffa427872a8d31b6404f7e2d4842262ed0ed62eebe9eeb37e80d48bc2c28",
  "17115a06e585aff5f9bf69872a9339f34d107e335b40202f95ea2de1abd7fc04eb0245b23d76f9c9cb84c4",
  "0c111f4be8c49afdf8e769952b9a25f34f0a32315f496500d0e72cf6ec90",
  "1a164a06f385aee6fdb92d92",
]

Saved ciphertext hex to cts_student.json (you can open and copy from this file).
Saved instructor answers to instructor_private_answers.json (PRIVATE).


In [20]:
# --- Setup ---
!pip install wordfreq --quiet

import random
import string
from wordfreq import zipf_frequency
import itertools

# Simple helpers
def xor_bytes(a: bytes, b: bytes) -> bytes:
    return bytes(x ^ y for x, y in zip(a, b))

def is_printable(b: bytes, threshold=0.9) -> bool:
    txt = b.decode("latin1", errors="ignore")
    printable = sum(c.isprintable() or c in "\n\t" for c in txt)
    return printable / len(txt) > threshold if b else False


In [21]:
# --- Crib-dragging helpers ---
def crib_drag(xor_stream: bytes, crib: bytes, offset: int) -> bytes:
    frag = xor_bytes(xor_stream[offset:offset+len(crib)], crib)
    return frag

def show_recovered_fragment(crib: bytes):
    for i, (c1, c2) in enumerate(itertools.combinations(cts, 2)):
        x = xor_bytes(c1, c2)
        for off in range(0, len(x)-len(crib)+1):
            frag = crib_drag(x, crib, off)
            if is_printable(frag):
                print(f"Pair ({i}): offset {off} → {frag}")

common_bytes = [w.encode() for w in ["Meet", " at ", "north", "upload", "key", "file", "the "]]
print("Helpers loaded. Ready to test cribs.")


Helpers loaded. Ready to test cribs.


In [22]:
# --- Student: ciphertexts provided by instructor (DO NOT RUN any instructor/generation cell) ---
cts_hex = [
  "0e1c5a52b189b8b2fdbf69952b9a7cbd4b112a351a4a243fd0a923fbed8ca96bb9434eb63d74a49d",
  "17115a06f385bef9e9bb698a26867cba574336345e49202595fc2ceba8ccbb2fe10606b52769a4d682808363beb8",
  "0e104c55f88bb3b2ffa427872a8d31b6404f7e2d4842262ed0ed62eebe9eeb37e80d48bc2c28",
  "17115a06e585aff5f9bf69872a9339f34d107e335b40202f95ea2de1abd7fc04eb0245b23d76f9c9cb84c4",
  "0c111f4be8c49afdf8e769952b9a25f34f0a32315f496500d0e72cf6ec90",
  "1a164a06f385aee6fdb92d92",
]
cts = [bytes.fromhex(h) for h in cts_hex]

for i, c in enumerate(cts):
    print(f"c{i} length = {len(c)} bytes")   # students get only ciphertext lengths, not plaintexts


c0 length = 40 bytes
c1 length = 46 bytes
c2 length = 38 bytes
c3 length = 43 bytes
c4 length = 44 bytes


In [15]:




for i, c in enumerate(cts):
    print(f"c{i} length = {len(c)} bytes")

# --- 4) Compute attack surface (STUDENTS: edit i, j only) ---
i, j = 0, 1         # <<< STUDENTS: change these indices to pick ciphertexts (e.g., 0,2 or 1,3)

# safety checks
assert isinstance(cts, list) and len(cts) >= 2, "cts not loaded or malformed"
assert 0 <= i < len(cts) and 0 <= j < len(cts) and i != j, "Invalid indices: choose different valid ciphertext indices"

# compute attack surface
x = xor_bytes(cts[i], cts[j])

# helpful output
print(f"Computed attack surface for pair (c{i}, c{j})")
print(f"  len(c{i}) = {len(cts[i])}, len(c{j}) = {len(cts[j])}, len(x) = {len(x)}")
print("Hex preview (first 80 bytes):", x[:80].hex())

# Keep x for the following manual crib-dragging cell




c0 length = 40 bytes
c1 length = 46 bytes
c2 length = 38 bytes
c3 length = 43 bytes
c4 length = 44 bytes
Computed attack surface for pair (c0, c1)
  len(c0) = 40, len(c1) = 46, len(x) = 40
Hex preview (first 80 bytes): 190d0054420c064b1404001f0d1c00071c521c014403041a45550f1045401244584548031a1d004b


In [27]:
# --- Manual crib-dragging example ---
crib = b"at "
x = xor_bytes(cts[0], cts[1])

for off in range(0, len(x)-len(crib)+1):
    cand = crib_drag(x, crib, off)
    if is_printable(cand):
        print("offset", off, "→", cand)


offset 0 → b'xy '
offset 1 → b'ltt'
offset 2 → b'a b'
offset 3 → b'56,'
offset 4 → b'#x&'
offset 5 → b'mrk'
offset 6 → b'g?4'
offset 7 → b'*`$'
offset 8 → b'up '
offset 9 → b'et?'
offset 10 → b'ak-'
offset 11 → b'~y<'
offset 12 → b'lh '
offset 13 → b"}t'"
offset 14 → b'as<'
offset 15 → b'fhr'
offset 16 → b'}&<'
offset 17 → b'3h!'
offset 18 → b'}ud'
offset 19 → b'`0#'
offset 20 → b'%w$'
offset 21 → b'bp:'
offset 22 → b'ene'
offset 23 → b'{1u'
offset 24 → b'$!/'
offset 25 → b'4{0'
offset 26 → b'nde'
offset 27 → b'q1`'
offset 28 → b'$42'
offset 29 → b'!fd'
offset 30 → b's0x'
offset 31 → b'%,e'
offset 32 → b'91h'
offset 33 → b'$<#'
offset 34 → b')w:'
offset 35 → b'bn='
offset 36 → b'{i '
offset 37 → b'|tk'


In [23]:
# --- Verify a crib by deriving key fragment ---
offset = 0  # replace with your discovered offset
crib = b"the "  # same crib

k_frag = xor_bytes(cts[0][offset:offset+len(crib)], crib)
recovered = xor_bytes(cts[1][offset:offset+len(crib)], k_frag)

print("Recovered:", recovered)


Recovered: b'meet'


In [24]:
# --- Automated crib search ---
def automated_crib_search(i, j, crib_list, show_hits=15):
    c1, c2 = cts[i], cts[j]
    x = xor_bytes(c1, c2)
    found = []
    for crib in crib_list:
        for off in range(0, len(x)-len(crib)+1):
            frag = crib_drag(x, crib, off)
            if is_printable(frag):
                found.append((crib, off, frag))
    for c, o, f in found[:show_hits]:
        print(f"Crib {c!r} at offset {o} → {f!r}")

automated_crib_search(0, 1, common_bytes, show_hits=20)


Crib b'Meet' at offset 0 → b'The '
Crib b'Meet' at offset 1 → b'@e16'
Crib b'Meet' at offset 2 → b"M1'x"
Crib b'Meet' at offset 5 → b'Ac.`'
Crib b'Meet' at offset 6 → b'K.qp'
Crib b'Meet' at offset 8 → b'Yaek'
Crib b'Meet' at offset 9 → b'Iezy'
Crib b'Meet' at offset 10 → b'Mzhh'
Crib b'Meet' at offset 11 → b'Rhyt'
Crib b'Meet' at offset 12 → b'@yes'
Crib b'Meet' at offset 13 → b'Qebh'
Crib b'Meet' at offset 14 → b'Mby&'
Crib b'Meet' at offset 15 → b'Jy7h'
Crib b'Meet' at offset 16 → b'Q7yu'
Crib b'Meet' at offset 18 → b'Qd!w'
Crib b'Meet' at offset 19 → b'L!fp'
Crib b'Meet' at offset 20 → b'\tfan'
Crib b'Meet' at offset 23 → b'W 0{'
Crib b'Meet' at offset 26 → b'Bu 4'
Crib b'Meet' at offset 27 → b'] %f'


## Challenge
Try to:
- Recover at least one readable plaintext fragment.
- Identify which ciphertext pairs share the most overlap.
- Modify `plaintexts` list and see how results change.
- (Optional) Write a function that reconstructs a full sentence.


## Discussion & Mitigation
- OTP reuse makes ciphertexts mathematically linked (no secrecy).
- Always use unique keys, IVs, or nonces for every message.
- Randomness quality and operational discipline are as critical as algorithms.


---
**EMCC Cybersecurity & Coding Club — Peer-Led Learning**  

