In [29]:
import os
from dotenv import load_dotenv
load_dotenv()

# load env variable
groq_api_kay = os.environ.get('GROQ_API_KEY')

from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


In [30]:
llm = ChatGroq(model='llama-3.3-70b-versatile', api_key=groq_api_kay)

In [11]:
urls = [
    "https://citylocalbiz.us/",
    "https://citylocalbiz.us/listing/charter-spectrum-cable-&-internet-portland-or-97210",
    "https://citylocalbiz.us/buy-credit",
    "https://citylocalbiz.us/terms-conditions",
    "https://citylocalbiz.us/privacy-policy",
    "https://citylocalbiz.us/blog",
    "https://citylocalbiz.us/about",
    "https://citylocalbiz.us/contact",
    "https://citylocalbiz.us/sign-up#google_vignette",
    "https://citylocalbiz.us/login",
    "https://citylocalbiz.us/package-price",
    "https://citylocalbiz.us/states",
    "https://citylocalbiz.us/cities",
    "https://citylocalbiz.us/categories"
]

loader = WebBaseLoader(web_path=urls)
docs = loader.load()

In [12]:
docs

[Document(metadata={'source': 'https://citylocalbiz.us/', 'title': 'Discover Local Businesses - City Local Biz', 'description': 'Explore top-rated local businesses in the USA on City Local Biz. Find services, restaurants, shops, health care and more. Empowering your local choices!', 'language': 'en'}, page_content="\n\n\n\n\n\nDiscover Local Businesses - City Local Biz\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLogin\n/\nRegister\n\n\n Add Listing\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome\n\n\nLogin / Register\n\n\nBlog\n\n\n\n\n\n\n\n\n\n\n\n\nJoin Us & Explore Thousands of Local Places\nFind the place you want and search the best place everything is here.\n\n\n\n\n\n\nKeywords\n\n\n\n\n\n\n\nWhere\n\nSelect States\nAlabama (AL)\nAlaska (AK)\nAlberta (AB)\nArizona (AZ)\nArkansas (AR)\nBritish Columbia (BC)\nCalifornia (CA)\nColorado (CO)\nConnecticut (CT)\nDelaware (DE)\nDistrict 

In [13]:
len(docs)

14

In [14]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://citylocalbiz.us/', 'title': 'Discover Local Businesses - City Local Biz', 'description': 'Explore top-rated local businesses in the USA on City Local Biz. Find services, restaurants, shops, health care and more. Empowering your local choices!', 'language': 'en'}, page_content='Discover Local Businesses - City Local Biz\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLogin\n/\nRegister\n\n\n Add Listing\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome\n\n\nLogin / Register\n\n\nBlog\n\n\n\n\n\n\n\n\n\n\n\n\nJoin Us & Explore Thousands of Local Places\nFind the place you want and search the best place everything is here.\n\n\n\n\n\n\nKeywords\n\n\n\n\n\n\n\nWhere'),
 Document(metadata={'source': 'https://citylocalbiz.us/', 'title': 'Discover Local Businesses - City Local Biz', 'description': 'Explore top-rated local businesses in the USA on City Local Biz. 

In [15]:
len(documents)

219

#### Embedding the all documents

In [None]:
embeddings = HuggingFaceEmbeddings(model='all-MiniLM-L6-v2')

In [None]:
# save chroma db vector database
Chroma.from_documents(documents, embedding=embeddings, persist_directory="./chroma_db")

<langchain_chroma.vectorstores.Chroma at 0x19b6562b550>

In [24]:
# load chroma db
vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embeddings)

In [25]:
vector_store.similarity_search("how may city in there website")

[Document(id='9b054f4e-3974-4b43-8556-ea8db063dac8', metadata={'language': 'en', 'title': 'Discover Top Businesses in any City - City Local Biz', 'description': 'Find the best best services in USA cities on City Local Biz. Explore a curated directory for services, restaurants, and more. Empower your city experience!', 'source': 'https://citylocalbiz.us/cities'}, page_content='Discover Top Businesses in any City - City Local Biz\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLogin\n/\nRegister\n\n\n Add Listing\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome\n\n\nLogin / Register\n\n\nBlog\n\n\n\n\n\n\n\n\n\n\n\n\n\nTop cities from United States\n\n\n\n\nHome\nTop Cities\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n                                            \n                                                New York\n                                                837'),
 Docume

In [53]:
retriver = vector_store.as_retriever()
result = retriver.invoke("how may states in there website")
result[0].page_content

'Select States\nAlabama (AL)\nAlaska (AK)\nAlberta (AB)\nArizona (AZ)\nArkansas (AR)\nBritish Columbia (BC)\nCalifornia (CA)\nColorado (CO)\nConnecticut (CT)\nDelaware (DE)\nDistrict of Columbia (DC)\nFlorida (FL)\nGeorgia (GA)\nHawaii (HI)\nIdaho (ID)\nIllinois (IL)\nIndiana (IN)\nIowa (IA)\nKansas (KS)\nKentucky (KY)\nLouisiana (LA)\nMaine (ME)\nManitoba (MB)\nMaryland (MD)\nMassachusetts (MA)\nMichigan (MI)\nMinnesota (MN)\nMississippi (MS)\nMissouri (MO)\nMontana (MT)\nNebraska (NE)\nNevada (NV)\nNew Brunswick (NB)'

In [55]:
prompt = ChatPromptTemplate.from_template(
    """
    You are a helpful and knowledgeable AI assistant for a directory website. 
    Answer the following user question using only the information provided in the context below.

    <context>
    {context}
    </context>

    Question: {input}

    If the answer is not in the context, respond with:
    "I’m sorry, I don’t have enough information to answer that."

    Answer in a clear and concise way.
    """
)

In [56]:
qa_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
rag_chain = create_retrieval_chain(retriver, qa_chain)

In [57]:
response = rag_chain.invoke({"input": "What is the privace and policy?"})

In [58]:
response

{'input': 'What is the privace and policy?',
 'context': [Document(id='6f2cab63-279e-4e2e-8db0-d8d7d0e5cfcd', metadata={'title': 'Privacy Policy | Safeguarding Your Data - City Local Biz', 'language': 'en', 'description': 'Our Privacy Policy ensures your data security. Discover how we protect your information. Trust City Local Biz for a safe online experience', 'source': 'https://citylocalbiz.us/privacy-policy'}, page_content='Privacy Policy Updates'),
  Document(id='5d800bbe-81a4-4630-9220-45c025ce5a0a', metadata={'title': 'Privacy Policy | Safeguarding Your Data - City Local Biz', 'source': 'https://citylocalbiz.us/privacy-policy', 'language': 'en', 'description': 'Our Privacy Policy ensures your data security. Discover how we protect your information. Trust City Local Biz for a safe online experience'}, page_content='in this Privacy Policy have the same meanings as in our Terms and Conditions, accessible at https://citylocalbiz.us. The following outlines our privacy policy.'),
  Doc

In [59]:
response['answer']

'The privacy policy is outlined to inform users how their personal information is collected, used, communicated, disclosed, and utilized to provide and improve the Site.'

In [62]:
response = rag_chain.invoke({"input": "What is the Terms and Conditions?"})

In [63]:
response['answer']

'The Terms and Conditions of City Local Biz include agreements limiting the use of information and guidelines for collecting and using personal information, such as: \n1. Identifying the purposes for which information is being collected before or at the time of collecting personal information.\n2. Collecting and using personal information solely with the objective of fulfilling those purposes specified and for other compatible purposes, unless consent is obtained or as required by law.'

#### Add Chat history