<a href="https://colab.research.google.com/github/jeevisha19/DATA-SCIENCE/blob/main/Transformer_Chatbot_Version2_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
!pip install sentence-transformers faiss-cpu langchain

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


In [22]:
documents = [
    "Natural Language Processing is a branch of artificial intelligence that focuses on the interaction between computers and human language.",
    "Transformers are deep learning models based on self-attention mechanisms and are widely used in NLP tasks.",
    "Machine learning allows systems to learn patterns from data without explicit programming.",
    "Deep learning uses neural networks with multiple layers to learn complex representations."
]

In [23]:
from sentence_transformers import SentenceTransformer

embedder = SentenceTransformer('all-MiniLM-L6-v2')

doc_embeddings = embedder.encode(documents)

In [24]:
import faiss
import numpy as np

dimension = doc_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(doc_embeddings))

In [26]:
def retrieve_docs(query, top_k=2):
    query_embedding = embedder.encode([query])
    distances, indices = index.search(np.array(query_embedding), top_k)
    return [documents[i] for i in indices[0]]

In [33]:
def rag_chat(query):
    # 1. Retrieve relevant documents
    retrieved_docs = retrieve_docs(query, top_k=2)

    # 2. Build short context
    context = " ".join(retrieved_docs)

    # 3. Augment input
    augmented_input = context + "\nQuestion: " + query + "\nAnswer:"

    # 4. Tokenize
    input_ids = tokenizer.encode(
        augmented_input,
        return_tensors="pt",
        truncation=True,
        max_length=512
    )

    # 5. Generate controlled response
    output_ids = model.generate(
        input_ids,
        max_new_tokens=80,
        do_sample=True,
        top_p=0.9,
        temperature=0.7,
        repetition_penalty=1.2,
        pad_token_id=tokenizer.eos_token_id
    )

    # 6. Decode and clean output
    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    response = response.split("Answer:")[-1].strip()

    return response


In [35]:
while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit", "bye"]:
        print("RAG Bot: Goodbye!")
        break
    print("RAG Bot:", rag_chat(query))
    print("-" * 50)


You: bye
RAG Bot: Goodbye!


Evaluation for RAG


EVALUATE RETRIEVAL QUALITY (RECALL@K)

In [36]:
test_data = [
    {
        "question": "What is natural language processing?",
        "ground_truth": "Natural language processing is a branch of artificial intelligence that focuses on the interaction between computers and human language."
    },
    {
        "question": "What are transformers?",
        "ground_truth": "Transformers are deep learning models based on self-attention mechanisms used in NLP."
    },
    {
        "question": "What is deep learning?",
        "ground_truth": "Deep learning uses neural networks with multiple layers to learn complex representations."
    }
]

In [37]:
def retrieval_recall_at_k(test_data, k=2):
    correct = 0

    for item in test_data:
        retrieved_docs = retrieve_docs(item["question"], top_k=k)
        if any(item["ground_truth"].lower() in doc.lower() for doc in retrieved_docs):
            correct += 1

    return correct / len(test_data)


In [38]:
recall_k = retrieval_recall_at_k(test_data, k=2)
print("Retrieval Recall@2:", recall_k)


Retrieval Recall@2: 0.6666666666666666


EVALUATE ANSWER QUALITY


In [39]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def semantic_similarity_score(test_data):
    scores = []

    for item in test_data:
        response = rag_chat(item["question"])

        emb_response = embedder.encode([response])
        emb_truth = embedder.encode([item["ground_truth"]])

        sim = cosine_similarity(emb_response, emb_truth)[0][0]
        scores.append(sim)

    return np.mean(scores)


In [40]:
similarity_score = semantic_similarity_score(test_data)
print("Average Semantic Similarity:", similarity_score)


Average Semantic Similarity: 0.61590594


Comparision with NO RAG version

Create a baseline function:

In [28]:
def no_rag_chat(query):
    input_ids = tokenizer.encode(
        query + tokenizer.eos_token,
        return_tensors="pt"
    )

    output_ids = model.generate(
        input_ids,
        max_length=300,
        pad_token_id=tokenizer.eos_token_id
    )

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

Compare Similarity:

In [41]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def compare_rag_vs_no_rag(test_data):
    rag_scores = []
    no_rag_scores = []

    for item in test_data:
        rag_resp = rag_chat(item["question"])
        no_rag_resp = no_rag_chat(item["question"])

        emb_truth = embedder.encode([item["ground_truth"]])

        rag_emb = embedder.encode([rag_resp])
        no_rag_emb = embedder.encode([no_rag_resp])

        rag_scores.append(cosine_similarity(rag_emb, emb_truth)[0][0])
        no_rag_scores.append(cosine_similarity(no_rag_emb, emb_truth)[0][0])

    return np.mean(no_rag_scores), np.mean(rag_scores)

In [42]:
no_rag, rag = compare_rag_vs_no_rag(test_data)

print("No-RAG Similarity:", no_rag)
print("RAG Similarity:", rag)


No-RAG Similarity: 0.27347732
RAG Similarity: 0.55999523
