In [4]:
#install dependencies
!pip install -q langchain langchain-openai langchain_community langchain_chroma langchainhub

In [2]:
# import necessary modules
import getpass
import os

from langchain_openai import ChatOpenAI
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

os.environ["OPENAI_API_KEY"] = getpass.getpass()



In [5]:
# create utility function to format the documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [12]:
# load the document that we want to analyze
# this document is a blog post from Microsoft Team related to LLMLingua2, tools to reduce token.
loader = WebBaseLoader(web_paths=("https://aclanthology.org/2024.findings-acl.57/",))
docs = loader.load()

# create a text splitter to split the document into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# convert into vector embeddings and save it in the vector store (Chroma)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

In [14]:
# create a retriever from the vector store
retriever = vectorstore.as_retriever()

In [15]:
#augment with prompt
prompt = hub.pull("rlm/rag-prompt")



In [16]:
#generate
model = ChatOpenAI(model="gpt-4o")

# create rag-chain model
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

rag_chain.invoke("Who is the author of LLMLingua2 and what is the main idea of the paper?")

'The authors of LLMLingua-2 are Zhuoshi Pan, Qianhui Wu, Huiqiang Jiang, Menglin Xia, Xufang Luo, Jue Zhang, Qingwei Lin, Victor RÃ¼hle, Yuqing Yang, Chin-Yew Lin, H. Vicky Zhao, Lili Qiu, and Dongmei Zhang. The main idea of the paper is focused on data distillation for efficient and faithful task-agnostic prompt compression.'