In [1]:

from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader,PyPDFDirectoryLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.prompts import PromptTemplate,ChatPromptTemplate
from langchain.chat_models import ChatOllama
from langchain.schema.output_parser import StrOutputParser 
from langchain.chains import create_retrieval_chain
from langchain_objectbox.vectorstores import ObjectBox
from langchain_community import document_loaders
from langchain_groq import ChatGroq
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain




In [2]:
import json
with open("../key.json") as f:
    data = json.load(f)

groq_api_key=data["groq_api"]
llm = ChatGroq(api_key=groq_api_key,
               model="gemma2-9b-it",
               temperature=0.2)




In [3]:
prompt=ChatPromptTemplate.from_template(
"""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
<context>
Questions:{input}

"""
)

In [4]:
### vector embedding  and object vectorisation db

## loading documents

loader=PyPDFDirectoryLoader("./us_census")
documents=loader.load()
text_splitter=CharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(documents)
document_chain=create_stuff_documents_chain(llm,prompt)

## embedding 

embedding=OllamaEmbeddings(model="nomic-embed-text",base_url="http://host.docker.internal:11434")
vectors=ObjectBox.from_documents(documents,embedding,embedding_dimensions=768)
retriever=vectors.as_retriever(
    search_kwargs={"k":2}
)


  embedding=OllamaEmbeddings(model="nomic-embed-text",base_url="http://host.docker.internal:11434")


In [5]:
retriever_chain=create_retrieval_chain(retriever,document_chain)

In [6]:
retriever_chain.invoke({"input":"Household income in usa?"})


{'input': 'Household income in usa?',
 'context': [Document(metadata={'author': 'U.S. Census Bureau', 'creationdate': '2023-11-29T08:35:27-05:00', 'creator': 'Adobe InDesign 18.2 (Windows)', 'keywords': 'acsbr-016', 'moddate': '2023-11-29T08:44:32-05:00', 'page': 9, 'page_label': '10', 'producer': 'Adobe PDF Library 17.0', 'source': 'us_census/acsbr-016.pdf', 'subject': 'American Community Survey Briefs', 'title': 'Poverty in States and Metropolitan Areas: 2022', 'total_pages': 15, 'trapped': '/False'}, page_content='10 U.S. Census Bureau\nratios below 50 percent.17 The \nHouston (6.8 percent), Detroit (6.7 \npercent), New York (6.4 percent), \nLos Angeles (6.3 percent), and \nSan Antonio (6.2 percent) MSAs all \nwere among the highest rates of \nindividuals with income-to-poverty \nratios below 50 percent.18 \nThe share of people with income \nbelow 50 percent of their poverty \nthreshold decreased in 4 of the 25 \nmost populous metropolitan areas \nfrom 2021 to 2022, while none of \n