In [1]:
import os

# Access API keys
openai_api_key = os.getenv('OPENAI_API_KEY')
pinecone_api_key = os.getenv('PINECONE_API_KEY')
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"
os.environ["LANGCHAIN_PROJECT"] = "rag-assignment"

In [48]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import Pinecone
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [41]:
# Initialize OpenAI
#LLM model
llm = ChatOpenAI(  
    openai_api_key=openai_api_key,  
    model_name='gpt-3.5-turbo',  
    temperature=0.0  
)

In [6]:
index_name = "rag-assignment"

In [20]:
pc = Pinecone(api_key=pinecone_api_key)
index = pc.Index(index_name)

In [7]:
model_name = "text-embedding-ada-002"

embeddings = OpenAIEmbeddings(
    model=model_name,
    openai_api_key=openai_api_key
)

In [36]:
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings, pinecone_api_key=pinecone_api_key, text_key='nepal-constitution-2072')

In [9]:
#load document in the vector store
file = 'nepal-constitution-2072.pdf'

In [12]:
# load documents
loader = PyPDFLoader(file)
documents = loader.load()
# split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)


In [22]:
print(index.describe_index_stats())



{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 936}},
 'total_vector_count': 936}


In [46]:
query = "nepal constitution"
vectorstore.max_marginal_relevance_search(query, k=3)

[Document(metadata={'page': 0.0, 'source': '/tmp/tmpp7o83c8n/tmp.pdf'}, page_content='1 \n  \n \n \n \nTHE CONSTITUTION OF NEPAL'),
 Document(metadata={'page': 197.0, 'source': '/tmp/tmpp7o83c8n/tmp.pdf'}, page_content='198 \n until election to the President or Vice -President is held and he or she assumes \noffice.  \n281. Appraisal and review of special rights : The Government of Nepal shall make \nappraisal  and review of the implementation of special rights of the women and \nDalit  community and impacts thereof, on the basis of human development \nindex, concurrently with a national census to be held in every ten years.  \n282. Ambassadors and special emissaries : (1) The President may, on the basis of the \nprinciple of inclusion, appoint Nepalese ambassadors, and special emissaries \nfor any specific purposes.  \n   (2) The President shall receive letters of credentials from foreign \nambassadors and diplomatic representat ives.  \n283. Appointments to be made  in accordance wit

In [47]:
qa_with_sources = RetrievalQAWithSourcesChain.from_chain_type(  
    llm=llm,  
    chain_type="stuff",  
    retriever=vectorstore.as_retriever()  
)  
qa_with_sources.invoke(query) 

{'question': 'nepal constitution',
 'answer': "The Constitution of Nepal was published on September 20, 2015, in the Nepal Gazette. It internalizes the people's sovereign right and right to autonomy and self-rule, while maintaining freedom, sovereignty, territorial integrity, national unity, independence, and dignity of Nepal. It also aims to end all forms of discrimination and oppression created by the feudalistic, autocratic, centralized, unitary system of governance, and promote social and cultural solidarity, tolerance, harmony, and unity in diversity.\n",
 'sources': '/tmp/tmpp7o83c8n/tmp.pdf'}

In [87]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    output_key='answer'
)

In [88]:
retriever=vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 4})
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
    chain_type="stuff", 
    return_source_documents=True,
    return_generated_question=True,
)

In [77]:
memory.clear()

In [89]:
question = "who is he?"
result = qa.invoke({"question": question})

In [None]:
for k,v in result.items():
    print(f'{k}: {v}')
    print('\n')

question: who is he?


chat_history: [HumanMessage(content='who is he?'), AIMessage(content='I\'m sorry, but based on the context provided, I cannot determine who "he" is referring to. If you can provide more information or context, I may be able to help you better.'), HumanMessage(content='who is he?'), AIMessage(content="I don't have enough context to determine who specifically is being referred to in your question.")]


answer: I don't have enough context to determine who specifically is being referred to in your question.


source_documents: [Document(metadata={'page': 235.0, 'source': '/tmp/tmpp7o83c8n/tmp.pdf'}, page_content='236'), Document(metadata={'page': 16.0, 'source': '/tmp/tmpp7o83c8n/tmp.pdf'}, page_content='any person who is authorized by law to represent any person in any court.  \n(3) Any person who is arrested shall be produced before the \nadjudicating authority within a period of twenty -four hours of such arrest , \nexcluding the time necessary for the journey fro