In [32]:
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from langchain import hub
from langchain_community.document_transformers import (
    EmbeddingsRedundantFilter,
    LongContextReorder,
)
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors.base import DocumentCompressorPipeline
from langchain.retrievers.document_compressors import LLMChainExtractor, LLMChainFilter, EmbeddingsFilter
from langchain.retrievers.merger_retriever import MergerRetriever
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import SystemMessage
from pprint import pprint

import os

In [15]:
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
model = OllamaLLM(model="llama3.2")

In [5]:

file_path = "/home/bishwayansaha99/langchain/docs/ww1.pdf"

loader = PyPDFLoader(file_path)
documents = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
doc_chunks = splitter.split_documents(documents)
print("===== Creating New Vector Database =====")
db = FAISS.from_documents(
    documents=doc_chunks,
    embedding=embedding
)

retriever = db.as_retriever(embedding=embedding)

===== Creating New Vector Database =====


In [13]:
def print_documents(docs):
    for i, doc in enumerate(docs):
        print(f"Document: {i+1}, Metadata: {doc.metadata}\n")
        pprint(doc.page_content)
        print("=" * 150)

In [22]:
normal_docs = retriever.get_relevant_documents("What changed after world war 1?")
print_documents(normal_docs)

Document: 1, Metadata: {'source': '/home/bishwayansaha99/langchain/docs/ww1.pdf', 'page': 3}

('The war also had major consequences for the European socialist and labour '
 'movement. \n'
 'Although well organised in many countries, including Britain , France  and '
 'Germany, the \n'
 'socialist movement failed to stop the war in 1 914. Initially skilled '
 'workers in the armaments \n'
 'industry were not only exempted from military service but also enjoyed '
 'higher wages and \n'
 'better food in return for the banning of strike action. But as the war '
 'continued living and \n'
 'working conditions for facto ry workers gradually declined. Socialist groups '
 'began to agitate \n'
 'for peace, a process that received a boost as a result of the 1917 Russian '
 'revolution. At the \n'
 'end of the war in 1918 the socialist and trade union movement was much '
 'stronger than in \n'
 '1914. \n'
 'The Great War also saw the introduction of the planned economy and a much '
 'bigger role

In [17]:
llm_chain_extractor = LLMChainExtractor.from_llm(model)
chain_extractor_retriever = ContextualCompressionRetriever(base_compressor=llm_chain_extractor, base_retriever=retriever)

In [21]:
compressed_docs_1 = chain_extractor_retriever.get_relevant_documents("What changed after world war 1?")
print_documents(compressed_docs_1)

Document: 1, Metadata: {'source': '/home/bishwayansaha99/langchain/docs/ww1.pdf', 'page': 2}

('> The changes resulting from the First World War \n'
 '>> \n'
 'The impact of the First World War was hugely destructive it also produced '
 'many \n'
 'new developments in medicine, warfare, politics and social attitudes.\n'
 ' \n'
 'Technologies such as airplanes, submarines, tanks all pla ying important new '
 'roles.')


In [28]:
llm_chain_filter = LLMChainFilter.from_llm(model)
chain_filter_retriever = ContextualCompressionRetriever(base_compressor=llm_chain_filter, base_retriever=retriever)

In [29]:
compressed_doc_2 = chain_filter_retriever.get_relevant_documents("What changed after world war 1?")
print_documents(compressed_doc_2)

Document: 1, Metadata: {'source': '/home/bishwayansaha99/langchain/docs/ww1.pdf', 'page': 3}

('The war also had major consequences for the European socialist and labour '
 'movement. \n'
 'Although well organised in many countries, including Britain , France  and '
 'Germany, the \n'
 'socialist movement failed to stop the war in 1 914. Initially skilled '
 'workers in the armaments \n'
 'industry were not only exempted from military service but also enjoyed '
 'higher wages and \n'
 'better food in return for the banning of strike action. But as the war '
 'continued living and \n'
 'working conditions for facto ry workers gradually declined. Socialist groups '
 'began to agitate \n'
 'for peace, a process that received a boost as a result of the 1917 Russian '
 'revolution. At the \n'
 'end of the war in 1918 the socialist and trade union movement was much '
 'stronger than in \n'
 '1914. \n'
 'The Great War also saw the introduction of the planned economy and a much '
 'bigger role

In [30]:
def get_content_length(docs):
    print(f"Content length: {len("\n".join([doc.page_content for doc in docs]))}")

In [31]:
get_content_length(normal_docs)
get_content_length(compressed_docs_1)
get_content_length(compressed_doc_2)

Content length: 3586
Content length: 289
Content length: 3586


In [33]:
embedding_filter = EmbeddingsFilter(embeddings=embedding, similarity_threshold=0.50)
embedding_filter_retriever = ContextualCompressionRetriever(base_compressor=embedding_filter, base_retriever=retriever)

In [34]:
compressed_doc_3 = embedding_filter_retriever.get_relevant_documents("What changed after world war 1?")
print_documents(compressed_doc_3)

Document: 1, Metadata: {'source': '/home/bishwayansaha99/langchain/docs/ww1.pdf', 'page': 3}

('The war also had major consequences for the European socialist and labour '
 'movement. \n'
 'Although well organised in many countries, including Britain , France  and '
 'Germany, the \n'
 'socialist movement failed to stop the war in 1 914. Initially skilled '
 'workers in the armaments \n'
 'industry were not only exempted from military service but also enjoyed '
 'higher wages and \n'
 'better food in return for the banning of strike action. But as the war '
 'continued living and \n'
 'working conditions for facto ry workers gradually declined. Socialist groups '
 'began to agitate \n'
 'for peace, a process that received a boost as a result of the 1917 Russian '
 'revolution. At the \n'
 'end of the war in 1918 the socialist and trade union movement was much '
 'stronger than in \n'
 '1914. \n'
 'The Great War also saw the introduction of the planned economy and a much '
 'bigger role

In [35]:
get_content_length(compressed_doc_3)

Content length: 3586
