In [None]:
!pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4
!pip install -qU langchain-openai
!pip install pypdf

In [None]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import PyPDFLoader
import getpass
import os

In [None]:
os.environ["OPENAI_API_KEY"] = getpass.getpass()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

gpt_model='gpt-3.5-turbo'
embedding_model='text-embedding-3-small'
filename='userguide.pdf' # the file must be in the same folder

In [None]:
llm = ChatOpenAI(model=gpt_model)
loader = PyPDFLoader(filename)
docs = loader.load()

In [None]:
# split text in chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(model=embedding_model))

In [None]:
# retrieve and generate the content
retriever = vectorstore.as_retriever(search_type="similarity")
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# send the question
rag_chain.invoke("come posso modificare un'immagine")