In [1]:
!pip install -U sentence-transformers faiss-cpu fastapi uvicorn[standard] rapidfuzz


Collecting sentence-transformers
  Downloading sentence_transformers-5.2.0-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp313-cp313-win_amd64.whl.metadata (7.6 kB)
Collecting fastapi
  Downloading fastapi-0.128.0-py3-none-any.whl.metadata (30 kB)
Collecting rapidfuzz
  Downloading rapidfuzz-3.14.3-cp313-cp313-win_amd64.whl.metadata (12 kB)
Collecting uvicorn[standard]
  Downloading uvicorn-0.40.0-py3-none-any.whl.metadata (6.7 kB)
Collecting annotated-doc>=0.0.2 (from fastapi)
  Downloading annotated_doc-0.0.4-py3-none-any.whl.metadata (6.6 kB)
Collecting httptools>=0.6.3 (from uvicorn[standard])
  Downloading httptools-0.7.1-cp313-cp313-win_amd64.whl.metadata (3.6 kB)
Collecting python-dotenv>=0.13 (from uvicorn[standard])
  Downloading python_dotenv-1.2.1-py3-none-any.whl.metadata (25 kB)
Collecting watchfiles>=0.13 (from uvicorn[standard])
  Downloading watchfiles-1.1.1-cp313-cp313-win_amd64.whl.metadata (5.0 kB)
Collecting websockets>=10.4 (


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import json
from pathlib import Path
from datetime import datetime, timedelta
import random

DATA_PATH = Path("data")
DATA_PATH.mkdir(exist_ok=True)

TEMPLATES_FILE = DATA_PATH / "ad_templates.json"

current_time = datetime.utcnow()

sample_templates = [
    {
        "id": 1,
        "title": "Monsson Sale - 30% Off",
        "description": "Promote a seasonal summer discount with bright visuals and a bold call-to-action.",
        "category": "Retail",
        "language": "en",
        "created_at": (current_time - timedelta(days=30)).isoformat(),
        "usage_count": 120,
    },
    {
        "id": 2,
        "title": "New Product Launch Campaign",
        "description": "Announce a new product launch with engaging visuals and early-bird offers.",
        "category": "Product Launch",
        "language": "en",
        "created_at": (current_time - timedelta(days=5)).isoformat(),
        "usage_count": 340,
    },
    {
        "id": 3,
        "title": "Black Friday Mega Sale",
        "description": "High-conversion ad template for Black Friday and flash sales.",
        "category": "Retail",
        "language": "en",
        "created_at": (current_time - timedelta(days=60)).isoformat(),
        "usage_count": 980,
    },
    {
        "id": 4,
        "title": "Festive Season Special Offer",
        "description": "Promote festive discounts with colorful graphics and celebratory messaging.",
        "category": "Seasonal",
        "language": "en",
        "created_at": (current_time - timedelta(days=20)).isoformat(),
        "usage_count": 410,
    },
    {
        "id": 5,
        "title": "Mobile App Download Ad",
        "description": "Drive app installs with clear value propositions and download CTAs.",
        "category": "App Marketing",
        "language": "en",
        "created_at": (current_time - timedelta(days=12)).isoformat(),
        "usage_count": 260,
    },
    {
        "id": 6,
        "title": "End of Season Clearance",
        "description": "Clear inventory with urgency-driven discount messaging.",
        "category": "Retail",
        "language": "en",
        "created_at": (current_time - timedelta(days=45)).isoformat(),
        "usage_count": 510,
    },
    {
        "id": 7,
        "title": "Limited Time Offer",
        "description": "Create urgency with countdown-style limited time promotions.",
        "category": "Promotions",
        "language": "en",
        "created_at": (current_time - timedelta(days=8)).isoformat(),
        "usage_count": 305,
    },
    {
        "id": 8,
        "title": "Subscription Signup Campaign",
        "description": "Encourage newsletter or subscription signups with incentives.",
        "category": "Lead Generation",
        "language": "en",
        "created_at": (current_time - timedelta(days=18)).isoformat(),
        "usage_count": 190,
    },
    {
        "id": 9,
        "title": "Back to School Sale",
        "description": "Target students and parents with school season discounts.",
        "category": "Seasonal",
        "language": "en",
        "created_at": (current_time - timedelta(days=70)).isoformat(),
        "usage_count": 430,
    },
    {
        "id": 10,
        "title": "E-commerce Free Shipping Ad",
        "description": "Boost conversions by highlighting free shipping benefits.",
        "category": "E-commerce",
        "language": "en",
        "created_at": (current_time - timedelta(days=15)).isoformat(),
        "usage_count": 390,
    },
]


for i in range(11, 500):
    sample_templates.append({
        "id": i,
        "title": f"Marketing campaign template {i}",
        "description": f"Versatile advertising template designed for campaign {i}, "
            f"focused on increasing engagement, conversions, and brand visibility.",
        "category": random.choice(["Retail", "Seasonal", "Product Launch", "E-commerce", "Branding"]),
        "language": "en",
        "created_at": (current_time - timedelta(days=random.randint(0, 365))).isoformat(),
        "usage_count": random.randint(0, 500),
    })

with open(TEMPLATES_FILE, "w", encoding="utf-8") as f:
    json.dump(sample_templates, f, ensure_ascii=False, indent=2)

len(sample_templates)


  current_time = datetime.utcnow()


499

In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import time

# Load data
with open(TEMPLATES_FILE, "r", encoding="utf-8") as f:
    templates = json.load(f)

corpus_texts = []
ids = []
for tpl in templates:
    # English language data
    text = f"{tpl['title']} [SEP] {tpl['description']} [SEP] category: {tpl['category']}"
    corpus_texts.append(text)
    ids.append(tpl["id"])

# Load multilingual model
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")  # 768-dim multilingual model use for multi language

# Encode
start = time.time()
embeddings = model.encode(
    corpus_texts,
    batch_size=64,
    show_progress_bar=True,
    convert_to_numpy=True,
    normalize_embeddings=True,  # cosine similarity score
)
print(f"Encoded {len(corpus_texts)} templates in {time.time() - start:.3f}s")

embedding_dim = embeddings.shape[1]

# FAISS index
index = faiss.IndexFlatIP(embedding_dim)
index.add(embeddings)
print("Index size:", index.ntotal)

# Persist index + ids + templates
faiss.write_index(index, str(DATA_PATH / "ad_faiss.index"))
np.save(DATA_PATH / "ad_ids.npy", np.array(ids))


  return forward_call(*args, **kwargs)
Batches: 100%|██████████| 8/8 [00:08<00:00,  1.12s/it]

Encoded 499 templates in 8.969s
Index size: 499





In [14]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import json
from pathlib import Path
import time
import math
from datetime import datetime
from rapidfuzz import fuzz, process

DATA_PATH = Path("data")

index = faiss.read_index(str(DATA_PATH / "ad_faiss.index"))
ids = np.load(DATA_PATH / "ad_ids.npy")

with open(DATA_PATH / "ad_templates.json", "r", encoding="utf-8") as f:
    templates_list = json.load(f)
templates = {tpl["id"]: tpl for tpl in templates_list}

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
NOW = datetime.utcnow()


  NOW = datetime.utcnow()


In [15]:
def ranking_score(similarity: float, created_at: str, usage_count: int,
                  alpha: float = 0.6, beta: float = 0.25, gamma: float = 0.15) -> float:
    # similarity: FAISS cosine similarity
    created = datetime.fromisoformat(created_at)
    age_days = (NOW - created).days + 1
    recency = 1.0 / math.log1p(age_days)  # newer → larger
    popularity = math.log1p(usage_count)  # more usage → larger
    return alpha * similarity + beta * recency + gamma * popularity


In [16]:
def search_templates(query: str, top_k: int = 5, use_fuzzy: bool = True):
    # Encode multilingual query
    q_vec = model.encode([query], normalize_embeddings=True)

    # Vector search
    start = time.time()
    scores, indices = index.search(q_vec, top_k * 3)  # over-fetch, then re-rank
    latency_ms = (time.time() - start) * 1000

    candidates = []
    titles = {tpl_id: tpl["title"] for tpl_id, tpl in templates.items()}

    for score, idx in zip(scores[0], indices[0]):
        tpl_id = int(ids[idx])
        tpl = templates[tpl_id]

        # recency+usage-aware ranking
        final_score = ranking_score(
            similarity=float(score),
            created_at=tpl["created_at"],
            usage_count=tpl["usage_count"],
        )

        # Optional typo tolerance: fuzzy title similarity
        if use_fuzzy:
            fuzzy_title_score = fuzz.token_sort_ratio(query, tpl["title"])
        else:
            fuzzy_title_score = 100

        candidates.append({
            "id": tpl_id,
            "sim_score": float(score),
            "rank_score": final_score,
            "fuzzy_title": fuzzy_title_score,
            "title": tpl["title"],
            "description": tpl["description"],
            "category": tpl["category"],
            "created_at": tpl["created_at"],
            "usage_count": tpl["usage_count"],
        })

    # Filter out completely irrelevant fuzzy matches
    if use_fuzzy:
        candidates = [c for c in candidates if c["fuzzy_title"] >= 40]

    # Sort by rank_score desc
    candidates.sort(key=lambda x: x["rank_score"], reverse=True)
    return {
        "latency_ms": latency_ms,
        "results": candidates[:top_k],
    }


In [17]:
import time
from rapidfuzz import fuzz

def search_templates(query, top_k=3):
    start = time.time()

    query_en = query  # already English for now
    query_vec = model.encode([query_en])

    sims, ids = index.search(query_vec, top_k)

    results = []
    for sim, idx in zip(sims[0], ids[0]):
        doc = templates[idx]

        fuzzy_score = fuzz.partial_ratio(query_en.lower(), doc["title"].lower())

        results.append({
            "title": doc["title"],
            "description": doc["description"],
            "sim_score": float(sim),
            "fuzzy_title": fuzzy_score,
            "rank_score": float(sim + 0.01 * fuzzy_score)
        })

    latency_ms = (time.time() - start) * 1000

    return {
        "latency_ms": latency_ms,
        "results": results
    }


In [18]:
test_queries = [
    # "समर सेल - 50% की छूट",     # Hindi
    "New Product Launch",       # English
]

for q in test_queries:
    out = search_templates(q, top_k=3)
    print("\n=======================")
    print("Query:", q)
    print(f"Latency: {out['latency_ms']:.2f} ms")
    for r in out["results"]:
        print(f"- rank_score={r['rank_score']:.3f}, sim={r['sim_score']:.3f}, fuzzy={r['fuzzy_title']}")
        print(f"  {r['title']} | {r['description']}")



Query: New Product Launch
Latency: 314.57 ms
- rank_score=4.367, sim=4.081, fuzzy=28.57142857142857
  Monsson Sale - 30% Off | Promote a seasonal summer discount with bright visuals and a bold call-to-action.
- rank_score=2.258, sim=1.925, fuzzy=33.333333333333336
  Marketing campaign template 392 | Versatile advertising template designed for campaign 392, focused on increasing engagement, conversions, and brand visibility.
- rank_score=2.253, sim=1.920, fuzzy=33.333333333333336
  Marketing campaign template 370 | Versatile advertising template designed for campaign 370, focused on increasing engagement, conversions, and brand visibility.
