In [None]:
# Run this cell once to install packages on the Kaggle runtime
!pip install -q sentence-transformers faiss-cpu ipywidgets


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m76.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import json
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from datetime import datetime
import os
from pathlib import Path

# ipywidgets for interactive notebook UI
import ipywidgets as widgets
from IPython.display import display, clear_output

print("Packages loaded.")
print("Working dir:", os.getcwd())


Packages loaded.
Working dir: /content


In [None]:
# Path to your intents.json — place the file in the notebook working directory or a dataset
INTENTS_PATH = "intents.json"

if not Path(INTENTS_PATH).exists():
    raise FileNotFoundError(
        f"{INTENTS_PATH} not found. Upload the file to the notebook (Kaggle: Add data/upload) "
        "or change INTENTS_PATH to point to your file."
    )

with open(INTENTS_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

documents = []
doc_metadata = []
for intent in data.get("intents", []):
    for response in intent.get("responses", []):
        documents.append(response)
        doc_metadata.append({
            "tag": intent.get("tag", ""),
            "patterns": intent.get("patterns", [])
        })

print(f"Loaded {len(documents)} documents from {len(data.get('intents', []))} intents.")


Loaded 831 documents from 831 intents.


In [None]:
# Choose model (all-MiniLM-L6-v2 is compact and fast for Kaggle)
MODEL_NAME = "all-MiniLM-L6-v2"

print("Loading model:", MODEL_NAME)
model = SentenceTransformer(MODEL_NAME)

print("Encoding documents...")
embeddings = model.encode(documents, show_progress_bar=True, convert_to_numpy=True)
embeddings = embeddings.astype('float32')

dim = embeddings.shape[1]
print("Embedding dimension:", dim)

print("Building FAISS index (L2)...")
index = faiss.IndexFlatL2(dim)
index.add(embeddings)
print("FAISS index built. Total vectors:", index.ntotal)


Loading model: all-MiniLM-L6-v2
Encoding documents...


Batches:   0%|          | 0/26 [00:00<?, ?it/s]

Embedding dimension: 384
Building FAISS index (L2)...
FAISS index built. Total vectors: 831


In [None]:
def retrieve_documents(query, k=3):
    q_emb = model.encode([query], convert_to_numpy=True).astype('float32')
    distances, indices = index.search(q_emb, k)
    results = []
    for i, idx in enumerate(indices[0]):
        if idx < 0 or idx >= len(documents):
            continue
        results.append({
            "content": documents[idx],
            "distance": float(distances[0][i]),
            "metadata": doc_metadata[idx]
        })
    return results

def generate_response(query, retrieved_docs, distance_threshold_best=1.5, related_threshold=2.0):
    if not retrieved_docs:
        return "I don't have enough information to answer that question.", []
    best = retrieved_docs[0]
    if best["distance"] > distance_threshold_best:
        return "I'm not confident about that. Please rephrase or provide more detail.", []
    response = best["content"]
    related = []
    for doc in retrieved_docs[1:]:
        if doc["distance"] < related_threshold:
            related.append({
                "content": doc["content"],
                "tag": doc["metadata"]["tag"],
                "relevance": 1.0 / (1.0 + doc["distance"])
            })
    return response, related


In [None]:
# UI elements
input_box = widgets.Text(
    value="",
    placeholder="Type your question about AI, ML, Deep Learning, etc.",
    description="Question:",
    layout=widgets.Layout(width="70%")
)
ask_button = widgets.Button(description="Ask", button_style="primary")
clear_button = widgets.Button(description="Clear", button_style="")
k_slider = widgets.IntSlider(value=3, min=1, max=5, description="k")
out = widgets.Output(layout={'border': '1px solid lightgray', 'height':'400px', 'overflow_y': 'auto'})

# Chat history storage
chat_history = []

def display_message(role, text, related=None, timestamp=None):
    ts = timestamp or datetime.now().strftime("%H:%M:%S")
    with out:
        print(f"[{ts}] {role}:")
        print(text)
        if related:
            print("\nRelated:")
            for r in related:
                print(f"  - Topic: {r['tag']} (relevance: {r['relevance']:.2%})")
                print(f"    {r['content']}")
        print("-" * 80)

def on_ask_clicked(b):
    query = input_box.value.strip()
    if not query:
        return
    chat_history.append(("User", query, datetime.now().strftime("%H:%M:%S")))
    with out:
        # show user message
        display_message("User", query)
    # retrieve and respond
    retrieved = retrieve_documents(query, k=k_slider.value)
    response, related = generate_response(query, retrieved)
    chat_history.append(("Assistant", response, datetime.now().strftime("%H:%M:%S")))
    with out:
        display_message("Assistant", response, related)
    input_box.value = ""

def on_clear_clicked(b):
    out.clear_output()
    chat_history.clear()

ask_button.on_click(on_ask_clicked)
clear_button.on_click(on_clear_clicked)

controls = widgets.HBox([input_box, ask_button, clear_button, k_slider])
ui = widgets.VBox([controls, out])

display(ui)

# Show a quick welcome text
with out:
    print("Welcome — ask a question and press Ask.")
    print("This notebook uses sentence-transformers + FAISS for retrieval.")
    print("-" * 80)


VBox(children=(HBox(children=(Text(value='', description='Question:', layout=Layout(width='70%'), placeholder=…

In [None]:
# Optional: buttons to populate example queries
examples = [
    "What is machine learning?",
    "Explain neural networks",
    "What is a database?",
    "What is deep learning?",
    "Explain cloud computing",
    "What is encryption?"
]

example_buttons = [widgets.Button(description=ex, layout=widgets.Layout(width='auto')) for ex in examples]

def make_example_handler(q):
    def handler(b):
        input_box.value = q
        on_ask_clicked(None)
    return handler

for btn, ex in zip(example_buttons, examples):
    btn.on_click(make_example_handler(ex))

display(widgets.HBox(example_buttons))


HBox(children=(Button(description='What is machine learning?', layout=Layout(width='auto'), style=ButtonStyle(…