In [None]:
pip install langchain-community transformers datasets faiss-cpu sentence-transformers Rouge


Collecting Rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: Rouge
Successfully installed Rouge-1.0.1


In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from datasets import load_dataset
from rouge import Rouge

dataset = load_dataset("squad", split="train[:1000]")  # Using first 1000 examples for demonstration


documents = [
    {"content": context, "metadata": {"title": title}}
    for context, title in zip(dataset["context"], dataset["title"])
]


text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.create_documents([doc["content"] for doc in documents])



embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


vectorstore = FAISS.from_documents(texts, embeddings)




In [None]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

generator = pipeline("text2text-generation", model="HuggingFaceTB/cosmo-1b", device= 0 , max_new_tokens = 50 )
llm = HuggingFacePipeline(pipeline=generator)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The model 'LlamaForCausalLM' is not supported for text2text-generation. Supported models are ['BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseForConditionalGeneration', 'LEDForConditionalGeneration', 'LongT5ForConditionalGeneration', 'M2M100ForConditionalGeneration', 'MarianMTModel', 'MBartForConditionalGeneration', 'MT5ForConditionalGeneration', 'MvpForConditionalGeneration', 'NllbMoeForConditionalGeneration', 'PegasusForConditionalGeneration', 'PegasusXForConditionalGeneration', 'PLBartForConditionalGeneration', 'ProphetNetForConditionalGeneration', 'SeamlessM4TForTextToText', 'SeamlessM4Tv2ForTextToText', 'SwitchTransformersForConditionalGeneration', 'T5ForConditionalGeneration', 'UMT5ForConditionalGeneration', 'XLMProphetNetForConditionalGeneration'].


In [None]:
from langchain.chains.base import Chain
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from typing import Dict, List, Any
from langchain.chains.question_answering import load_qa_chain


prompt_template = """You question-answering task assistant.
Answer the question only based on your context and knowledge.
If you do not know the answer with given context, say that you do not know.
Use the following context:
{context}
Answer the following question: {question}
Answer: """

PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])


retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # Retrieve top 3 documents


def custom_qa_chain(llm, prompt, retriever):
    qa = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

    def _run(query: str):
        docs = retriever.get_relevant_documents(query)
        result = qa({"input_documents": docs, "question": query}, return_only_outputs=True)
        return {"result": result["output_text"], "source_documents": docs}

    return _run


qa_chain = custom_qa_chain(llm, PROMPT, retriever)


In [None]:

dataset = load_dataset("squad", split="validation[:90]")  # Using first 100 examples for brevity


rouge = Rouge()

def evaluate_rag(qa_chain, dataset):
    rouge_scores = []
    retrieval_precision_scores = []

    for example in tqdm(dataset):
        question = example['question']
        ground_truth = example['answers']['text'][0]


        rag_response = qa_chain(question)
        generated_answer = rag_response['result']
        generated_answer = re.split('Answer:',generated_answer)[-1]
        generated_answer= re.split(r'\n\n', generated_answer)[0]  # Truncate at the first \n\n
        retrieved_docs = rag_response['source_documents']


        rouge_score = rouge.get_scores(generated_answer, ground_truth)[0]
        rouge_scores.append(rouge_score['rouge-l']['f'])


        relevant_docs = [doc for doc in retrieved_docs if ground_truth.lower() in doc.page_content.lower()]
        retrieval_precision = len(relevant_docs) / len(retrieved_docs) if retrieved_docs else 0
        retrieval_precision_scores.append(retrieval_precision)


    avg_rouge = np.mean(rouge_scores)
    avg_retrieval_precision = np.mean(retrieval_precision_scores)

    return {
        "avg_rouge_l_f1": avg_rouge,
        "avg_retrieval_precision": avg_retrieval_precision
    }

In [None]:
from tqdm import tqdm
import numpy as np

evaluation_results = evaluate_rag(qa_chain, dataset)

print("Evaluation Results:")
print(f"Average ROUGE-L F1: {evaluation_results['avg_rouge_l_f1']:.4f}")
print(f"Average Retrieval Precision: {evaluation_results['avg_retrieval_precision']:.4f}")

  0%|          | 0/90 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  1%|          | 1/90 [00:01<02:39,  1.79s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  2%|▏         | 2/90 [00:03<02:34,  1.76s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  3%|▎         | 3/90 [00:05<02:32,  1.75s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  4%|▍         | 4/90 [00:07<02:33,  1.79s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  6%|▌         | 5/90 [00:08<02:33,  1.81s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  7%|▋         | 6/90 [00:10<02:29,  1.78s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  8%|▊         | 7/90 [00:12<02:35,  1.88s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
  9%|▉         | 8/90 [00:14<02:40,  1.96s/it]Setting `pad_token_id` to `eos_token_id`:2 for ope

Evaluation Results:
Average ROUGE-L F1: 0.0302
Average Retrieval Precision: 0.0444





In [None]:
import re

question = "Where are Tussauds Wax Museums?"
answer = qa_chain(question)
answer['result'] = re.split('Answer:',answer['result'])[-1]
answer['result']= re.split(r'\n\n', answer['result'])[0]  # Truncate at the first \n\n
print(answer)
print("\nSource Documents:")
for i, doc in enumerate(answer["source_documents"], 1):
    print(f"{i}. {doc.page_content[:100]}...")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'result': ' 1. New York, Washington, D.C., Amsterdam, Bangkok, Hollywood and Sydney.', 'source_documents': [Document(metadata={}, page_content='Tussauds Wax Museums in major cities around the world, including New York, Washington, D.C., Amsterdam, Bangkok, Hollywood and Sydney.'), Document(metadata={}, page_content='Tussauds Wax Museums in major cities around the world, including New York, Washington, D.C., Amsterdam, Bangkok, Hollywood and Sydney.'), Document(metadata={}, page_content='Tussauds Wax Museums in major cities around the world, including New York, Washington, D.C., Amsterdam, Bangkok, Hollywood and Sydney.')]}

Source Documents:
1. Tussauds Wax Museums in major cities around the world, including New York, Washington, D.C., Amsterd...
2. Tussauds Wax Museums in major cities around the world, including New York, Washington, D.C., Amsterd...
3. Tussauds Wax Museums in major cities around the world, including New York, Washington, D.C., Amsterd...
