# Contextual compression

In [4]:
import pinecone
import openai
import logging
import os
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Pinecone

#create embedding API and llm
os.environ["OPENAI_API_KEY"] = "{YOUR_OPENAI_KEY}"
embedding = OpenAIEmbeddings()
llm = OpenAI()

#Connect database
pinecone.init(api_key="{YOUR_PINECONE_APIKEY}", environment="gcp-starter")
index = pinecone.Index("terry-wiki")
text_field = "text"
vectordb = Pinecone(
    index, embedding.embed_query, text_field
)

compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor = compressor,base_retriever = vectordb.as_retriever())

query = "Where is the locaction for vacation?"
docs = compression_retriever.get_relevant_documents(query,k=2)
for doc in docs:
    print(doc)
    print("\n")




page_content='Austria (, ;  ), officially the\xa0Republic of Austria ( ), is a country in Central Europe. Around Austria there are the countries of Germany, Czech Republic, Slovakia, Hungary, Slovenia, Italy, Switzerland, and Liechtenstein. The people in Austria speak German, a few also speak Hungarian, Slovenian and Croatian. The capital of Austria is Vienna (Wien).' metadata={'chunk': 10.0, 'source': 'https://simple.wikipedia.org/wiki/Austria', 'title': 'Austria', 'wiki-id': '55'}


page_content='The sizes of cities can be very different. This depends on the type of city. Cities built hundreds of years ago and which have not changed much are much smaller than modern cities. There are two main reasons. One reason is that old cities often have a city wall, and most of the city is inside it. Another important reason is that the streets in old cities are often narrow. If the city got too big, it was hard for a cart carrying food to get to the marketplace. People in cities need food, and 

# Compression with LLMChainFilter

In [6]:
import pinecone
import openai
import logging
import os
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Pinecone

#create embedding API and llm
os.environ["OPENAI_API_KEY"] = "{YOUR_OPENAI_KEY}"
embedding = OpenAIEmbeddings()
llm = OpenAI()

#Connect database
pinecone.init(api_key="{YOUR_PINECONE_APIKEY}", environment="gcp-starter")
index = pinecone.Index("terry-wiki")
text_field = "text"
vectordb = Pinecone(
    index, embedding.embed_query, text_field
)

filter = LLMChainFilter.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor = filter,base_retriever = vectordb.as_retriever(),k=2)

query = "Where is the locaction for vacation?"
docs = compression_retriever.get_relevant_documents(query,k=2)
for doc in docs:
    print(doc)
    print("\n")
    



page_content='Austria (, ;  ), officially the\xa0Republic of Austria ( ), is a country in Central Europe. Around Austria there are the countries of Germany, Czech Republic, Slovakia, Hungary, Slovenia, Italy, Switzerland, and Liechtenstein. Currently, the chancellor is Karl Nehammer The previous chancellor was Alexander Schallenberg (2021). Austria has been a member-state of the United Nations since 1955 the European Union since 1995 and OPEC since 2019.\n\nThe people in Austria speak German, a few also speak Hungarian, Slovenian and Croatian. The capital of Austria is Vienna (Wien).\n\nAustria is more than a thousand years old. Its history can be followed to the ninth century. At that time the first people moved to the land now known as Austria. The name "Ostarrichi" is first written in an official document from 996. Since then this word has developed into the Modern German word Österreich, which literally means "East Empire."\n\nPolitics \nAustria is a democratic republic. It is a ne

# Compression with pipeline

In [8]:
from langchain.llms import OpenAI
import pinecone
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainFilter
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter

#create embedding API
os.environ["OPENAI_API_KEY"] = "{YOUR_OPENAI_KEY}"
llm = OpenAI(temperature=0)
embedding = OpenAIEmbeddings()

#Connect database
pinecone.init(api_key="{YOUR_PINECONE_APIKEY}", environment="gcp-starter")
index = pinecone.Index("terry-wiki")
text_field = "text"
vectordb = Pinecone(
    index, embedding.embed_query, text_field
)

llm_filter = LLMChainFilter.from_llm(llm)
llm_extractor = LLMChainExtractor.from_llm(llm)
redundant_filter = EmbeddingsRedundantFilter(embeddings=embedding)


pipeline_compressor = DocumentCompressorPipeline( transformers=[redundant_filter,llm_extractor,llm_filter])
# query ="Where is the cuba? and nearest country by the Cuba?" 이고 k=3
# redundant_filter 사용하면 결과가 Cuba에 대한것 하나만 나옴

# query ="Where is the cuba? and nearest country by the Cuba?" 이고 k=3
# redundant_filter 없이 사용하면 결과가 Cuba에 대한것 3개가 같은 레코드 나옴

#pipeline_compressor = DocumentCompressorPipeline( transformers=[llm_extractor])

compression_retriever = ContextualCompressionRetriever(
    base_compressor = pipeline_compressor,base_retriever = vectordb.as_retriever(),k=10)

#query = "Where is the best place for summer vacation?"
query ="Where is the cuba? and nearest country by the Cuba?"
docs = compression_retriever.get_relevant_documents(query)
for doc in docs:
    print(doc)
    print("\n")




page_content='Cuba is an island country in the Caribbean Sea. The country is made up of the big island of Cuba, the Isla de la Juventud island (Isle of Youth), and many smaller islands. Havana is the capital of Cuba. It is the largest city. The second largest city is Santiago de Cuba. In Spanish, the capital is called "La Habana". Cuba is near the United States, Mexico, Haiti, Jamaica and the Bahamas.' metadata={'chunk': 2.0, 'source': 'https://simple.wikipedia.org/wiki/Cuba', 'title': 'Cuba', 'wiki-id': '178'}






In [None]:
from langchain.chains import RetrievalQAWithSourcesChain

qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)
qa_with_sources(query)