# Generate Promo Codes (12‑char) with Letter/Digit Rules → CSV / Excel

This notebook generates **unique** promo codes with configurable rules and writes them to
a CSV file (streaming) and an Excel file. It uses cryptographically secure randomness.

**Quick start:**
1. Run the first two cells to install/import deps and set your rules.
2. Run the main generation cell.
3. Files will be created in the working directory.

### What "certain letter requirements" can I enforce?
- Minimum counts for letters and digits
- Allowed character set (e.g., A–Z + 0–9, excluding ambiguous characters)
- Must include at least one char from specific sets (e.g., must contain one of AEIOU)
- Optional prefix/suffix
- Forbid patterns via regular expressions (e.g., no three repeats, no specific substrings)

You can tweak the CONFIG block below.

%%
(Optional) If running on a very minimal environment, uncomment to ensure pandas/openpyxl are present.\
%pip install -q pandas openpyxl

## Configuration

In [14]:
!python --version

Python 3.11.13


In [15]:
import csv
import os
import re
import sqlite3
from secrets import choice as secure_choice
import pandas as pd
from tqdm import tqdm

In [16]:
OUTPUT_CSV = "promo_codes.csv"
OUTPUT_XLSX = "promo_codes.xlsx"
TOTAL_CODES = 1200000
CODE_LENGTH = 12

# All codes must start with this prefix
PREFIX = "TURMP"
SUFFIX = ""

# Allowed chars AFTER the prefix
ALPHABET_STR = "23456789ABCDEFGHJKMNPQRSTUVWXYZabcdefghjkmnpqrstuvwxyz"
ALPHABET = list(ALPHABET_STR)

# Disable extra composition rules (not requested)
MIN_LETTERS = None
MIN_DIGITS = None
REQUIRE_ONE_OF = set()
FORBIDDEN_REGEXES = []  # no extra forbids

# Uniqueness backend
UNIQUENESS_BACKEND = "sqlite"  # "memory" or "sqlite"
SQLITE_DB_PATH = "promo_codes_unique.db"
SQLITE_TABLE = "codes"

# Excel writing
SHEET_NAME = "codes"
COL_A_HEADER = "codes_1m"
COL_B_HEADER = "codes_200k"
FIRST_COLUMN_TARGET = 1_000_000
SECOND_COLUMN_TARGET = TOTAL_CODES - FIRST_COLUMN_TARGET  # 200,000
EXCEL_CHUNK_SIZE = 100000  # number of rows per write flush

## Helper functions

In [17]:
_letter_re = re.compile(r"[A-Za-z]")
_digit_re = re.compile(r"[0-9]")
_forbidden_res = [re.compile(p) for p in FORBIDDEN_REGEXES]


def passes_rules(code: str) -> bool:
    """Return True if the code meets all configured rules (mostly disabled here)."""
    if PREFIX and not code.startswith(PREFIX):
        return False
    if SUFFIX and not code.endswith(SUFFIX):
        return False

    core = code[len(PREFIX) : len(code) - len(SUFFIX) if SUFFIX else None]

    # Optional rules (disabled)
    if MIN_LETTERS is not None and len(_letter_re.findall(core)) < MIN_LETTERS:
        return False
    if MIN_DIGITS is not None and len(_digit_re.findall(core)) < MIN_DIGITS:
        return False
    if REQUIRE_ONE_OF and not any(ch in REQUIRE_ONE_OF for ch in core):
        return False
    for rx in _forbidden_res:
        if rx.search(core):
            return False

    # Ensure ONLY allowed characters are used in the body
    if not all(ch in ALPHABET for ch in core):
        return False

    # Length check
    if len(code) != CODE_LENGTH:
        return False

    return True


def random_code() -> str:
    body_len = CODE_LENGTH - len(PREFIX) - len(SUFFIX)
    if body_len <= 0:
        raise ValueError("CODE_LENGTH too small for given PREFIX/SUFFIX")
    body = "".join(secure_choice(ALPHABET) for _ in range(body_len))
    return f"{PREFIX}{body}{SUFFIX}"


class UniqueSink:
    """Track uniqueness either in memory or via SQLite."""

    def __init__(self, backend: str = "memory"):
        self.backend = backend
        if backend == "memory":
            self._seen = set()
        elif backend == "sqlite":
            self._conn = sqlite3.connect(SQLITE_DB_PATH)
            cur = self._conn.cursor()
            cur.execute(
                f"CREATE TABLE IF NOT EXISTS {SQLITE_TABLE} (code TEXT PRIMARY KEY)"
            )
            self._conn.commit()
        else:
            raise ValueError("backend must be 'memory' or 'sqlite'")

    def add_if_new(self, code: str) -> bool:
        if self.backend == "memory":
            if code in self._seen:
                return False
            self._seen.add(code)
            return True
        else:
            try:
                cur = self._conn.cursor()
                cur.execute(f"INSERT INTO {SQLITE_TABLE}(code) VALUES (?)", (code,))
                self._conn.commit()
                return True
            except sqlite3.IntegrityError:
                return False

    def close(self):
        if self.backend == "sqlite":
            self._conn.close()


# Remove old outputs
for p in [OUTPUT_CSV, OUTPUT_XLSX, SQLITE_DB_PATH]:
    if os.path.exists(p):
        os.remove(p)

unique = UniqueSink(UNIQUENESS_BACKEND)

In [18]:
csv_file = open(OUTPUT_CSV, "w", newline="")
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["code"])

# Excel: pre-create workbook with both headers in one row
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl", mode="w") as xw:
    pd.DataFrame(columns=[COL_A_HEADER, COL_B_HEADER]).to_excel(
        xw, index=False, sheet_name=SHEET_NAME
    )

# Track how many rows written to each Excel column so far
excel_rows_written_A = (
    0  # below header; data starts at row 2 in Excel, but we count data rows only
)
excel_rows_written_B = 0

# Buffers to flush in chunks
buffer_A = []  # for the first 1,000,000 codes
buffer_B = []  # for the remaining 200,000 codes

## Main generation loop

In [19]:
with tqdm(total=TOTAL_CODES, desc="Generating Codes", unit="code") as pbar:
    written_total = 0
    while written_total < TOTAL_CODES:
        code = random_code()
        if not passes_rules(code):
            continue
        if not unique.add_if_new(code):
            continue

        csv_writer.writerow([code])

        if written_total < FIRST_COLUMN_TARGET:
            buffer_A.append(code)
            if len(buffer_A) >= EXCEL_CHUNK_SIZE or (
                written_total + 1 == FIRST_COLUMN_TARGET
            ):
                dfA = pd.DataFrame({COL_A_HEADER: buffer_A})
                with pd.ExcelWriter(
                    OUTPUT_XLSX, engine="openpyxl", mode="a", if_sheet_exists="overlay"
                ) as xw:
                    dfA.to_excel(
                        xw,
                        index=False,
                        header=False,
                        sheet_name=SHEET_NAME,
                        startrow=1 + excel_rows_written_A,
                        startcol=0,
                    )
                excel_rows_written_A += len(buffer_A)
                buffer_A.clear()
        else:
            buffer_B.append(code)
            if len(buffer_B) >= EXCEL_CHUNK_SIZE or (written_total + 1 == TOTAL_CODES):
                dfB = pd.DataFrame({COL_B_HEADER: buffer_B})
                with pd.ExcelWriter(
                    OUTPUT_XLSX, engine="openpyxl", mode="a", if_sheet_exists="overlay"
                ) as xw:
                    dfB.to_excel(
                        xw,
                        index=False,
                        header=False,
                        sheet_name=SHEET_NAME,
                        startrow=1 + excel_rows_written_B,
                        startcol=1,
                    )
                excel_rows_written_B += len(buffer_B)
                buffer_B.clear()

        written_total += 1
        pbar.update(1)

# ---------- CLEANUP ----------
unique.close()
csv_file.close()

print(f"Done. Wrote {written_total:,} unique codes to {OUTPUT_CSV} and {OUTPUT_XLSX}.")

Generating Codes: 100%|██████████| 1200000/1200000 [23:51<00:00, 838.49code/s] 

Done. Wrote 1,200,000 unique codes to promo_codes.csv and promo_codes.xlsx.



