In [1]:
!pip install langchain-openai langchain_community langchain-pinecone unstructured langchain-text-splitters


Collecting langchain-openai
  Downloading langchain_openai-0.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.2-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain-pinecone
  Downloading langchain_pinecone-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting unstructured
  Downloading unstructured-0.16.0-py3-none-any.whl.metadata (24 kB)
Collecting langchain-text-splitters
  Downloading langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-core<0.4.0,>=0.3.9 (from langchain-openai)
  Downloading langchain_core-0.3.12-py3-none-any.whl.metadata (6.3 kB)
Collecting openai<2.0.0,>=1.40.0 (from langchain-openai)
  Downloading openai-1.52.0-py3-none-any.whl.metadata (24 kB)
Collecting tiktoken<1,>=0.7 (from langchain-openai)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  D

In [28]:
from langchain_pinecone import PineconeVectorStore
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain.document_loaders import PyPDFDirectoryLoader
import os


In [29]:
import os

# Create a directory named "Data" if it doesn't exist
if not os.path.exists("Data"):
    os.makedirs("Data")

print("Directory 'Data' is created or already exists.")

Directory 'Data' is created or already exists.


In [30]:
loader = PyPDFDirectoryLoader('/content/Data')

In [31]:
docs=loader.load()

In [32]:
docs[0]

Document(metadata={'source': '/content/Data/WebAdministration.pdf', 'page': 0}, page_content=' \n \n  \n      e-Vidhan  \nWeb Administrator & Super \nAdministrator Module  \nUser Manual Version 1.0  \nHimachal Pradesh Vidhan Sabha  \nShimla  ')

In [33]:
from google.colab import userdata

os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['PINECONE_API_KEY'] = userdata.get('PINECONE_API_KEY')


In [34]:
embeddings= OpenAIEmbeddings(
    model='text-embedding-3-small'
)

index_name = "qna-bot"

In [35]:
#splitting document into chunks
text_splitter = RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=800,chunk_overlap=50)
split_docs=text_splitter.split_documents(docs)

In [36]:
split_docs[4]

Document(metadata={'source': '/content/Data/WebAdministration.pdf', 'page': 3}, page_content='e-Vidhan  \n \n4                                                                                   Web Administrator & Super Administrator Module  Logging in to the e - Vidhan Application  \nTo login to the dashboard, the employee of the Vidhan Sabha has to open the e - \nVidhan website at web address "https://secure.evidhan.nic.in ". \nThe following login screen appears:  \n                                         \n \n \nOn the login screen the staff login through credentials given.  \nAfter the login, navigate to the  following dashboard.')

In [37]:
# creating vector store
vectorstore = PineconeVectorStore.from_documents(
    documents=split_docs,
    embedding=embeddings,
    index_name=index_name
)

In [38]:
from langchain_pinecone import PineconeVectorStore

In [39]:
query = "Lgin to e-Vidhaan Application"
similarity_docs = vectorstore.similarity_search(query)

In [40]:
similarity_docs

[Document(id='9f255ef2-c528-4745-aa1e-cf4d12778a37', metadata={'page': 3.0, 'source': '/content/Data/WebAdministration.pdf'}, page_content='e-Vidhan  \n \n4                                                                                   Web Administrator & Super Administrator Module  Logging in to the e - Vidhan Application  \nTo login to the dashboard, the employee of the Vidhan Sabha has to open the e - \nVidhan website at web address "https://secure.evidhan.nic.in ". \nThe following login screen appears:  \n                                         \n \n \nOn the login screen the staff login through credentials given.  \nAfter the login, navigate to the  following dashboard.'),
 Document(id='25fd81a1-2f1d-429a-967e-f55727443193', metadata={'page': 3.0, 'source': '/content/Data/WebAdministration.pdf'}, page_content='e-Vidhan  \n \n4                                                                                   Web Administrator & Super Administrator Module  Logging in to the e - 

In [41]:
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

llm= ChatOpenAI(
    model="gpt-4o",
    temperature=0
    )

In [42]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain

retriever = vectorstore.as_retriever()


contextualize_q_system_prompt = (
    "Given a chat history and latest user question, "
    "which might reference context in chat history, "
    "reformulate it into a standalone question that can be understood "
    "without that chat history. Do not answer the question, just reformulate it "
    "if needed and otherwise return it as is."
)

# Create the contextualize QA ChatPromptTemplate
contextualize_q_system_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)


history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_system_prompt)

# Define system prompt for answering questions
system_prompt = (
    "You are a RAG chatbot designed to provide moderately long, informative, and engaging responses to user queries. "
    "Your task is to synthesize information from retrieved documents and generate coherent, contextually relevant answers. "
    "Follow these guidelines:\n\n"
    "Response Length: Aim for responses that are moderately long, typically between 100-200 words. Ensure the information is comprehensive yet concise.\n"
    "Information Synthesis: Combine information from multiple sources to provide a well-rounded answer. Cite sources using numeric references (e.g., [1], [2]) within the text.\n"
    "Formatting: Use Markdown elements to enhance readability. This includes:\n"
    "- **Bold** for emphasis\n"
    "- *Italics* for highlighting\n"
    "- Lists for clarity\n"
    "- Code blocks for technical content\n"
    "Tone and Style: Maintain a friendly, approachable, and professional tone. Adapt your language to suit the user’s query, ensuring clarity and engagement.\n"
    "Source Attribution: Always attribute information to its sources accurately. Use numeric references that correspond to a list of sources at the end of the response.\n"
    "Safety and Compliance: Adhere to copyright laws and avoid sharing verbatim content from copyrighted sources. Provide brief summaries instead.\n\n"
    "{context}"
)

# Create the QA ChatPromptTemplate
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


In [43]:
# Set up the infinite loop for user interaction
while True:
    user_question = input("Ask your question (or type 'exit' to quit): ")

    if user_question.lower() == 'exit':
        break

    response = rag_chain.invoke(
        {
            "input": user_question,
            "chat_history": []
        }
    )

    # Print the response with clear demarcations between each question
    print("\n" + "="*50)
    print("Question: ", user_question)
    print("Answer: ", response['answer'])
    print("="*50 + "\n")


Ask your question (or type 'exit' to quit): what is the process of admin login in e-vidhaan?

Question:  what is the process of admin login in e-vidhaan?
Answer:  The process of logging in as an admin to the e-Vidhan application involves several straightforward steps. Here's a concise guide to help you navigate the login procedure:

1. **Access the Website**: Begin by opening the e-Vidhan website using the web address: [https://secure.evidhan.nic.in](https://secure.evidhan.nic.in) [1][2].

2. **Login Screen**: Once the website loads, you will be presented with a login screen. This is where you will enter your credentials.

3. **Enter Credentials**: Use the credentials provided to you by the Vidhan Sabha administration. These typically include a username and password specific to your role as an admin.

4. **Dashboard Navigation**: After successfully logging in, you will be directed to the admin dashboard. This section includes various functionalities such as the "My Dashboard" section a