In [1]:
from langchain_community.document_loaders import UnstructuredURLLoader
urls = ['https://www.plutodaycare.com/',
        'https://www.plutodaycare.com/about-us',
        'https://www.plutodaycare.com/locations',
        ]
loader = UnstructuredURLLoader(urls=urls)
data = loader.load()  

In [None]:
data

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split data
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
docs = text_splitter.split_documents(data)


print("Total number of documents: ",len(docs))

In [None]:
docs[0]

## Setup the Embeddings

In [None]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from dotenv import load_dotenv
load_dotenv(override=True)

vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings())

## Setup retriever

In [None]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

retrieved_docs = retriever.invoke("What kind of services they provide?")

In [None]:
len(retrieved_docs)

In [None]:
print(retrieved_docs[0].page_content)

## Setup Open AI LLM

In [None]:
llm = OpenAI(temperature=0.4, max_tokens=500)

In [6]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [7]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [10]:
response = rag_chain.invoke({"input": "Provide their address for all locations"})
print(response["answer"])