In [1]:
# ===============================
# üì¶ INSTALL DEPENDENCIES
# ===============================
!pip install transformers sentence-transformers faiss-cpu openai

# ===============================
# üîß IMPORTS
# ===============================
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import torch

# ===============================
# üß≠ 1. RETRIEVAL-BASED CHATBOT
# ===============================

# Create a small FAQ-style knowledge base
knowledge_base = [
    {"question": "What is the capital of France?", "answer": "The capital of France is Paris."},
    {"question": "Who is the CEO of Tesla?", "answer": "Elon Musk is the CEO of Tesla."},
    {"question": "What is machine learning?", "answer": "Machine learning is a field of AI that enables systems to learn from data."},
    {"question": "What is the largest planet?", "answer": "Jupiter is the largest planet in our solar system."},
    {"question": "Who wrote Hamlet?", "answer": "Hamlet was written by William Shakespeare."},
]

# Load an embedding model for semantic similarity
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Precompute question embeddings
kb_embeddings = embedder.encode([item["question"] for item in knowledge_base], convert_to_tensor=True)

def retrieval_based_chatbot(user_query):
    # Encode the user query
    query_embedding = embedder.encode(user_query, convert_to_tensor=True)
    # Find the most similar question
    hits = util.semantic_search(query_embedding, kb_embeddings, top_k=1)
    best_match = hits[0][0]
    best_index = best_match["corpus_id"]
    best_score = best_match["score"]
    # Return matched answer
    return knowledge_base[best_index]["answer"], best_score

# ===============================
# üß† 2. GENERATIVE CHATBOT (GPT)
# ===============================

# ‚ö†Ô∏è Replace YOUR_API_KEY with your actual OpenAI API key
import requests
import json

# üîë Your OpenRouter API key
OPENROUTER_API_KEY = "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"  # keep this safe!

def generative_chatbot(user_query):
    prompt = f"You are a helpful assistant. Answer this question clearly: {user_query}"

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
    }

    data = {
        "model": "gpt-3.5-turbo",  # or "gpt-4-turbo", "meta-llama/llama-3.1-70b-instruct"
        "messages": [
            {"role": "system", "content": "You are a concise and factual question-answering assistant."},
            {"role": "user", "content": prompt}
        ],
    }

    response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, data=json.dumps(data))

    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"].strip()
    else:
        return f"‚ö†Ô∏è Error: {response.status_code}, {response.text}"

# ===============================
# üîç 3. TEST BOTH CHATBOTS
# ===============================
user_queries = [
    "Who is the CEO of Tesla?",
    "Tell me about the capital of France",
    "Which planet is the biggest?",
    "Who wrote Hamlet?",
    "Explain machine learning in simple words."
]

retrieval_results, generative_results = [], []

for q in user_queries:
    r_ans, score = retrieval_based_chatbot(q)
    g_ans = generative_chatbot(q)
    retrieval_results.append((q, r_ans, round(score, 3)))
    generative_results.append((q, g_ans))

# ===============================
# üìä 4. VISUAL COMPARISON
# ===============================
import pandas as pd
from IPython.display import display, HTML

data = []
for i, q in enumerate(user_queries):
    data.append({
        "User Question": q,
        "Retrieval-Based Answer": retrieval_results[i][1],
        "Similarity Score": retrieval_results[i][2],
        "Generative Answer": generative_results[i][1]
    })

df = pd.DataFrame(data)
display(HTML(df.to_html(escape=False)))


Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m31.4/31.4 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Unnamed: 0,User Question,Retrieval-Based Answer,Similarity Score,Generative Answer
0,Who is the CEO of Tesla?,Elon Musk is the CEO of Tesla.,1.0,Elon Musk is the CEO of Tesla.
1,Tell me about the capital of France,The capital of France is Paris.,0.928,"The capital of France is Paris. It is known for its rich history, iconic landmarks such as the Eiffel Tower and the Louvre Museum, as well as its renowned cuisine and fashion industry."
2,Which planet is the biggest?,Jupiter is the largest planet in our solar system.,0.95,Jupiter is the largest planet in our solar system.
3,Who wrote Hamlet?,Hamlet was written by William Shakespeare.,1.0,"The play ""Hamlet"" was written by William Shakespeare."
4,Explain machine learning in simple words.,Machine learning is a field of AI that enables systems to learn from data.,0.78,"Machine learning is a type of artificial intelligence that teaches computers to learn and make decisions without being explicitly programmed. Essentially, it involves creating algorithms and models that allow machines to improve their performance on a task over time by learning from data."
