In [2]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

loader = PyPDFDirectoryLoader('data')
docs = loader.load()

len(docs)

3

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 60)
chunk_docs = text_splitter.split_documents(docs)

len(chunk_docs)

16

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")
openai_api_key

'sk-proj-HLBdBn5jdvVr7o6L7vt6T3BlbkFJnqZAHgt1fnkgUQVOx5PO'

In [5]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(api_key=openai_api_key)

In [6]:
from pinecone import Pinecone

pc = Pinecone(api_key=pinecone_api_key)
index_name = "compression"

  from tqdm.autonotebook import tqdm


In [7]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore.from_documents(
    chunk_docs,
    embeddings,
    index_name = index_name
)

In [47]:
query = "I want to hike in the summer. Could you suggest what are the best destinations for hiking? Im also interested in visiting towns"

retriever = vector_store.as_retriever()
relevant_docs = retriever.get_relevant_documents(query)

for doc in relevant_docs:
    print("Relevant docs")
    print(doc)
    print('\n')

Relevant docs
page_content='to visit is during the spring and summer months when the weather is mild, and the landscape is 
alive with color. Hikers should wear sturdy hiking boots, weather -appropriate clothing, and bring 
plenty of water and snacks for longer treks.  
2. Mourne  Mountains:  
Venture north to County Down, and you'll discover the Mourne Mountains, a landscape of rugged 
granite peaks that rise majestically above the Irish Sea. Slieve Donard, Northern Ireland's highest 
peak, beckons intrepid hikers to its summit, offering breathtaking panoramic views of the' metadata={'page': 0.0, 'source': "data\\A Hiker's Paradise.pdf"}


Relevant docs
page_content='round trip, depending on the chosen route and hiking pace. The best time to visit is during the 
summer months when the weather is mild, and the days are long. Hikers should wear sturdy hiking 
boots with good grip, layers to protect against wind and rain, and bring a camera to capture the 
breathtaking views.  
6. Carrau

In [11]:
#compressed retriever
from langchain_openai import ChatOpenAI
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers import ContextualCompressionRetriever

base_retriever = vector_store.as_retriever(search_type = "similarity", search_kwargs={"k": 5})
llm = ChatOpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)

In [46]:
#Relevant docs based on context compression
compressed_docs= compression_retriever.get_relevant_documents(query)

for doc in compressed_docs:
    print("Compressed Document:")
    print(doc)
    print("\n")

Compressed Document:
page_content='- "during the spring and summer months"
- "Hikers should wear sturdy hiking boots, weather -appropriate clothing, and bring plenty of water and snacks for longer treks."
- "Mourne Mountains"
- "Venture north to County Down"
- "Slieve Donard, Northern Ireland's highest peak"' metadata={'page': 0.0, 'source': "data\\A Hiker's Paradise.pdf"}


Compressed Document:
page_content='- The best time to visit is during the summer months when the weather is mild, and the days are long.
- Hikers should wear sturdy hiking boots with good grip, layers to protect against wind and rain, and bring a camera to capture the breathtaking views.
- Carrauntoohil: In the heart of County Kerry lies Carrauntoohil, Ireland's highest peak and a magnet for hikers seeking a challenge.' metadata={'page': 1.0, 'source': "data\\A Hiker's Paradise.pdf"}


Compressed Document:
page_content='- The best time to visit is during the spring and summer months when the weather is mild, and th

In [16]:
#optimise query
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
optimisation_template = """Given the following user query, optimise it for the retrieval by identifying key concepts and expanding on them:

User query:{query}

Oprimised query:"""

optimisation_prompt = PromptTemplate(template=optimisation_template, input_variables=['query'])
optimisation_chain = LLMChain(llm=llm, prompt = optimisation_prompt, verbose=True)
optimised_query = optimisation_chain.run(query)
optimised_query



[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following user query, optimise it for the retrieveal by identifying key concepts and expanding on them:

User query:I want to hike in the summer. Could you suggest what are the best destinations for hiking? Im also interested in visiting towns

Oprimised query:[0m

[1m> Finished chain.[0m


'I am looking for the best hiking destinations for the summer. Additionally, I am interested in visiting towns during my trip. Can you suggest some ideal locations for hiking and exploring towns during the summer months?'

In [33]:
#RAG Pipeline
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever = compression_retriever,
    return_source_documents = True,
    chain_type_kwargs={
        "prompt": PromptTemplate(
            template=""" Use the following pieces of context to answer the question at the end.
            If you don't know the answer, just say that you don't know. Do not try to make up an answer.
            
            {context}
            
            Question: {question}
            Answer:""",
            input_variables=['context', 'question']
        ), 
    },
    verbose =  True
)

result = qa_chain({'query': optimised_query})
result



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


{'query': 'I am looking for the best hiking destinations for the summer. Additionally, I am interested in visiting towns during my trip. Can you suggest some ideal locations for hiking and exploring towns during the summer months?',
 'result': 'One ideal location for hiking and exploring towns during the summer months is the Wicklow Mountains in Ireland. The mild weather and long days make it a perfect destination for outdoor activities. The extensive network of trails offers a variety of options for hikers of all levels, and the nearby towns provide opportunities for exploring local culture and history.',
 'source_documents': [Document(metadata={'page': 1.0, 'source': "data\\A Hiker's Paradise.pdf"}, page_content='summer months when the weather is mild, and the days are long.'),
  Document(metadata={'page': 0.0, 'source': "data\\A Hiker's Paradise.pdf"}, page_content='spring and summer months when the weather is mild, and the landscape is alive with color.'),
  Document(metadata={'pag

In [None]:
# Display the flow of data
print("Data Flow in the RAG Pipeline:")
print(f"Initial Query: {optimised_query['query']}")
print(f"Optimized Query: {optimised_query['optimized_query']}")
print(f"Prompt Context: {qa_chain.chain_type_kwargs['prompt'].template}")

In [49]:
# Print results
print(f"Original query: {query}")
print(f"Optimized query: {optimised_query}")
print(f"Answer: {result['result']}")
print("\nSource documents:")
for doc in result['source_documents']:
    print(f"- {doc.page_content}...")

Original query: I want to hike in the summer. Could you suggest what are the best destinations for hiking? Im also interested in visiting towns
Optimized query: I am looking for the best hiking destinations for the summer. Additionally, I am interested in visiting towns during my trip. Can you suggest some ideal locations for hiking and exploring towns during the summer months?
Answer: Some ideal locations for hiking and exploring towns during the summer months in Ireland could be the Mourne Mountains in County Down and the Wicklow Mountains. Both offer great hiking opportunities with stunning landscapes and are located near towns where you can explore and experience local culture.


In [37]:
# Run the RetrievalQA pipeline without context compression
retriever_qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = retriever,
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": PromptTemplate(
            template="""Use the following pieces of context to answer the question at the end. 
            If you don't know the answer, just say that you don't know, don't try to make up an answer.

            {context}

            Question: {question}
            Answer:""",
            input_variables=["context", "question"],
        ),
    },
)

In [48]:
# Execute the pipeline without context compression
result = retriever_qa_chain({"query": optimised_query})

# Print the results without context compression
print(f"Original query: {query}")
print(f"Optimized query: {optimised_query}")
print(f"Answer: {result['result']}")



Original query: I want to hike in the summer. Could you suggest what are the best destinations for hiking? Im also interested in visiting towns
Optimized query: I am looking for the best hiking destinations for the summer. Additionally, I am interested in visiting towns during my trip. Can you suggest some ideal locations for hiking and exploring towns during the summer months?
Answer: Some ideal locations for hiking and exploring towns during the summer months in Ireland could be the Mourne Mountains in County Down and the Wicklow Mountains. Both offer great hiking opportunities with stunning landscapes and are located near towns where you can explore and experience local culture.
