In [1]:
from langchain_community.document_loaders import WebBaseLoader
import os

os.environ['USER_AGENT'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [None]:
loader_multiple_pages = WebBaseLoader(["https://www.home0001.com/how-it-works", "https://www.home0001.com/legal"])
docs = loader_multiple_pages.load()
print(docs[1].page_content[:128])

In [None]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# set up the splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
# split the docs
splits = text_splitter.split_documents(docs)

# create a vector database with the splits
vectorstore = Chroma.from_documents(
    documents=splits, 
    embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"),
    # persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

print(len(splits))
print(len(splits[12].page_content))
print(splits[12].metadata)

  from .autonotebook import tqdm as notebook_tqdm


99
990
{'source': 'https://www.home0001.com/legal', 'title': 'Legal Notices for 0001 homes live flexibly own your home', 'description': 'Own the perfect home.', 'language': 'en'}


In [16]:
# Retrieve and generate using the relevant snippets of the site.

retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
retrieved_docs = retriever.invoke("What is home0001?")

print(len(retrieved_docs))
print(retrieved_docs[0].page_content[:128])

5
device to retrieve, index, “scrape,” “data mine” or otherwise gather Site content, or reproduce or circumvent the navigational s


In [13]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")



In [10]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [14]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# print(format_docs(docs))

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("What is Home0001?"):
    print(chunk, end="", flush=True)


print(rag_chain.invoke("can i rent an apartment?"))

Home0001 is a peer-to-peer housing collective that allows members to buy fully furnished homes and share them with other members. It provides a platform for members to access homes in different locations for free while making their own available when not in use. Home0001 emphasizes ownership and control over the properties, enabling a streamlined home buying and sharing experience.Yes, you can rent an apartment through HOME0001, which offers fully furnished homes. You can also swap homes within their network by texting your desired dates. However, your home must meet their standards if you wish to make it available for others.
