In [None]:
import re
from typing import Callable, Dict, Pattern

RedactionMode = str  # 'tag' | 'mask' | 'partial'

def luhn_checksum_ok(num: str) -> bool:
    """Return True if num passes Luhn checksum (used for credit cards)."""
    digits = [int(d) for d in re.sub(r"\D", "", num)]
    if len(digits) < 12:  # weed out short false positives
        return False
    total = 0
    parity = len(digits) % 2
    for i, d in enumerate(digits):
        if i % 2 == parity:
            d *= 2
            if d > 9:
                d -= 9
        total += d
    return total % 10 == 0

# --- Patterns (tuned for high precision, you can expand as needed) ---
PATTERNS: Dict[str, Pattern] = {
    # name -> compiled regex
    "EMAIL": re.compile(r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[A-Za-z]{2,}\b"),
    # US-style phones: (123) 456-7890, 123-456-7890, 123.456.7890, +1 123 456 7890
    "PHONE": re.compile(
        r"""(?x)
        (?<!\d)                    # no digit before
        (?:\+?1[\s.-]?)?           # country code optional
        (?:\(?\d{3}\)?[\s.-]?)     # area code
        \d{3}[\s.-]?\d{4}          # local number
        (?!\d)                     # no digit after
        """
    ),
    # U.S. SSN: 123-45-6789 or 123456789
    "SSN": re.compile(r"(?<!\d)(\d{3}-?\d{2}-?\d{4})(?!\d)"),
    # Credit cards: 13–19 digits with separators; validated by Luhn before redacting
    "CREDIT_CARD": re.compile(
        r"""(?x)
        (?<!\d)
        (?:\d[ -]?){12,19}\d
        (?!\d)
        """
    ),
    # IPv4
    "IPV4": re.compile(
        r"""\b(?:
            (?:25[0-5]|2[0-4]\d|1?\d?\d)\.
            (?:25[0-5]|2[0-4]\d|1?\d?\d)\.
            (?:25[0-5]|2[0-4]\d|1?\d?\d)\.
            (?:25[0-5]|2[0-4]\d|1?\d?\d)
        )\b""",
        re.X,
    ),
    # Dates: YYYY-MM-DD, MM/DD/YYYY, DD/MM/YYYY (conservative)
    "DATE": re.compile(
        r"""(?x)
        \b(
            (?:\d{4}[-/](?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12]\d|3[01])) |   # YYYY-MM-DD
            (?:(?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12]\d|3[01])[-/]\d{4}) |   # MM/DD/YYYY
            (?:(?:0[1-9]|[12]\d|3[01])[-/](?:0[1-9]|1[0-2])[-/]\d{4})     # DD/MM/YYYY
        )\b
        """
    )
}

def _mask_all(s: str) -> str:
    return "█" * len(s)

def _mask_partial_keep_last4(s: str) -> str:
    digits = re.sub(r"\D", "", s)
    keep = digits[-4:] if len(digits) >= 4 else ""
    return f"{'█' * (max(0, len(digits)-4))}{keep}" or _mask_all(s)

def _redaction_token(kind: str) -> str:
    return f"[REDACTED:{kind}]"

def _make_replacer(kind: str, mode: RedactionMode) -> Callable[[re.Match], str]:
    if kind == "CREDIT_CARD":
        def repl_cc(m: re.Match) -> str:
            text = m.group(0)
            if not luhn_checksum_ok(text):
                return text  # skip false positives
            if mode == "tag":
                return _redaction_token(kind)
            elif mode == "partial":
                # preserve separators while masking digits except last 4
                digits = re.sub(r"\D", "", text)
                masked = _mask_partial_keep_last4(text)
                # re-insert original separators by mapping left-to-right
                out = []
                di = 0
                for ch in text:
                    if ch.isdigit():
                        out.append(masked[di])
                        di += 1
                    else:
                        out.append(ch)
                return "".join(out)
            else:  # 'mask'
                return re.sub(r"\d", "█", text)
        return repl_cc

    def generic_repl(m: re.Match) -> str:
        text = m.group(0)
        if mode == "tag":
            return _redaction_token(kind)
        elif mode == "partial":
            # partial makes sense mainly for PHONE/SSN; default: keep last 2 visible
            digits = re.sub(r"\D", "", text)
            if len(digits) >= 4:
                keep = digits[-2:]
                masked_digits = "█" * (len(digits) - 2) + keep
                # re-insert separators
                out, di = [], 0
                for ch in text:
                    if ch.isdigit():
                        out.append(masked_digits[di]); di += 1
                    else:
                        out.append(ch)
                return "".join(out)
            return _mask_all(text)
        else:  # 'mask'
            # preserve non-digits for readability
            if re.search(r"\d", text):
                return re.sub(r"\d", "█", text)
            return _mask_all(text)
    return generic_repl

def redact_pii(text: str, mode: RedactionMode = "tag") -> str:
    """
    Redact PII from the given text.
    mode:
      - 'tag'     -> replace with [REDACTED:<TYPE>]
      - 'mask'    -> replace characters (e.g., digits) with █
      - 'partial' -> keep last 2 (phones/ssn) or last 4 (credit cards)
    """
    # Apply in an order that reduces overlap surprises
    order = ["EMAIL", "CREDIT_CARD", "SSN", "PHONE", "IPV4", "DATE"]
    result = text
    for kind in order:
        pattern = PATTERNS[kind]
        replacer = _make_replacer(kind, mode)
        result = pattern.sub(replacer, result)
    return result

# --- Example usage ---
if __name__ == "__main__":
    sample = (
        "Contact Jane Doe at jane.doe+test@example.com or (212) 555-0199. "
        "SSN 123-45-6789, alt SSN 123456789. "
        "Card: 4242 4242 4242 4242 exp 07/2029. "
        "Server at 192.168.1.42. "
        "DOB 2005-09-23; met on 09/23/2005."
    )
    for m in ("tag", "mask", "partial"):
        print(f"\nMode: {m}\n{redact_pii(sample, mode=m)}")



Mode: tag
Contact Jane Doe at [REDACTED:EMAIL] or [REDACTED:PHONE]. SSN [REDACTED:SSN], alt SSN [REDACTED:SSN]. Card: [REDACTED:CREDIT_CARD] exp 07/2029. Server at [REDACTED:IPV4]. DOB [REDACTED:DATE]; met on [REDACTED:DATE].

Mode: mask
Contact Jane Doe at █████████████████████████ or (███) ███-████. SSN ███-██-████, alt SSN █████████. Card: ████ ████ ████ ████ exp 07/2029. Server at ███.███.█.██. DOB ████-██-██; met on ██/██/████.

Mode: partial
Contact Jane Doe at █████████████████████████ or (███) ███-██99. SSN ███-██-██89, alt SSN ███████89. Card: ████ ████ ████ 4242 exp 07/2029. Server at ███.███.█.42. DOB ████-██-23; met on ██/██/██05.
