In [None]:
# ======================================================
# 0. Setup – install deps (run once)
# ======================================================
!pip -q install gradio huggingface_hub sentence-transformers scikit-learn scipy seaborn

# ======================================================
# 1. Imports
# ======================================================
import os, joblib, numpy as np
import gradio as gr
from scipy.stats import entropy
from huggingface_hub import login, whoami, snapshot_download
from sentence_transformers import SentenceTransformer



In [None]:
# ======================================================
# 2. Hugging Face login (interactive – safest in Colab)
# ======================================================
login()                                    # paste your HF token only once
print("✅ Logged in as:", whoami()["name"])




In [6]:
# =============================================================
# 3. Download dataset repo containing model artifacts
# =============================================================
REPO_ID = "ireneom3/strategy-classifier"  # your dataset repo
local_path = snapshot_download(repo_id=REPO_ID, repo_type="dataset")

# =============================================================
# 4. Load embedder + classifier + label encoder from subfolder
# =============================================================
model_subdir = os.path.join(local_path, "all-mpnet-base-v2")

embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")  # same as training
clf = joblib.load(os.path.join(model_subdir, "strategy_selector_lr_balanced.joblib"))
le  = joblib.load(os.path.join(model_subdir, "label_encoder_balanced.joblib"))



Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

all-MiniLM-L6-v2/strategy_selector_lr_ba(…):   0%|          | 0.00/171k [00:00<?, ?B/s]

all-mpnet-base-v2/strategy_selector_lr_b(…):   0%|          | 0.00/1.10M [00:00<?, ?B/s]

all-MiniLM-L6-v2/label_encoder_balanced (…):   0%|          | 0.00/12.0k [00:00<?, ?B/s]

all-MiniLM-L6-v2/sbert_embeddings_balanc(…):   0%|          | 0.00/75.3M [00:00<?, ?B/s]

all-MiniLM-L6-v2/strategy_selector_lr_ba(…):   0%|          | 0.00/171k [00:00<?, ?B/s]

.gitattributes: 0.00B [00:00, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [9]:
import gradio as gr
import numpy as np
from scipy.stats import entropy

# ===============================
# Router config
# ===============================
HIGH_CONF   = 0.75   # ↓ lowered
MARGIN_MIN  = 0.10   # ↓ more tolerant
ENTROPY_MAX = 1.4    # ↑ more forgiving

# ===============================
# Prompt classifier logic
# ===============================
def route_prompt(prompt: str):
    emb   = embedder.encode([prompt])
    probs = clf.predict_proba(emb)[0]
    idxs  = probs.argsort()[::-1]

    top1, top2 = probs[idxs[0]], probs[idxs[1]]
    top_label  = le.inverse_transform([idxs[0]])[0]
    ent        = float(entropy(probs))

    # Extra logging (Top-5 for dev)
    top5 = [(le.inverse_transform([i])[0], f"{probs[i]:.2f}") for i in idxs[:5]]
    top5_str = "\n".join([f"{i+1}. {lbl} → {conf}" for i, (lbl, conf) in enumerate(top5)])

    # Routing logic
    if (top1 >= HIGH_CONF) and (top1 - top2 >= MARGIN_MIN) and (ent <= ENTROPY_MAX):
        action = "✅ High confidence – proceed with selected agent"
    elif top_label == "Direct" and top1 < 0.8:
        action = "⚠️ 'Direct' fallback – low confidence, verify before routing"
    else:
        action = f"🟡 Uncertain (p={top1:.2f}, Δ={top1 - top2:.2f}, H={ent:.2f}) – ask or fallback"

    return top_label, f"{top1:.2f}", action, top5_str


# ===============================
# Demo prompts
# ===============================
examples = [
    "Develop a drawing app using basic geometric shapes and composite structures.",  # Design Pattern
    "Implement undo/redo functionality using Command pattern.",                      # Design Pattern
    "Use the Factory pattern to create different types of parsers.",                 # Design Pattern
    "Fix crash when loading image without metadata.",                                # Bug Fix
    "Connect two microservices and pass JSON messages between them.",                # Data Pipeline
    "Refactor controller logic into separate strategy classes.",                     # Refactor + Design
    "Build a documentation generator for Python functions.",                         # Doc Generator
    "Test if planner output matches final execution trace."                          # Integration Test
]

# ===============================
# Gradio interface
# ===============================
iface = gr.Interface(
    fn=route_prompt,
    inputs=gr.Textbox(lines=3, label="Enter programming prompt"),
    outputs=[
        gr.Textbox(label="Predicted Strategy"),
        gr.Textbox(label="Confidence Score"),
        gr.Textbox(label="Routing Decision"),
        gr.Textbox(label="🔍 Top-5 Predictions")
    ],
    examples=examples,
    title="🧠 Strategy Classifier + Router",
    description="Classifies code prompts into strategy labels and decides if agent routing is safe. Includes entropy & margin checks for better fallback handling."
)

iface.launch(debug=True)


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://98f64c0c81cdc3aaa2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://98f64c0c81cdc3aaa2.gradio.live


