# Installing required modules

In [None]:
!pip install groq
!pip install gradio
!pip install langchain-groq
!pip install langchain
!pip install langchain-community
!pip install faiss-gpu
!pip install faiss-cpu
!pip install pypdf
!pip install sentence_transformers

Collecting groq
  Downloading groq-0.15.0-py3-none-any.whl.metadata (14 kB)
Downloading groq-0.15.0-py3-none-any.whl (109 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.6/109.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.15.0
Collecting gradio
  Downloading gradio-5.13.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.7-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.6.0 (from gradio)
  Downloading gradio_client-1.6.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (f

# Importing Modules

In [None]:
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
import numpy as np


# Connector

In [None]:
groq_api = "your key"


llm = ChatGroq(
    api_key=groq_api,
    model="Llama3-8b-8192"
)

prompt = ChatPromptTemplate.from_template(
"""
Answer the questions based on the provided context only.
Please provide the most accurate response based on the question
<context>
{context}
<context>
Questions:{input}

"""
)

# States

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load the PDF file
loader = PyPDFLoader("/basic-laws-book-2016.pdf")
docs = loader.load()

# Check how many documents were loaded and display the first one
print(f"Loaded {len(docs)} documents.")
if docs:
    print("Sample document text:", docs[0].page_content[:500])  # Displaying first 500 characters of first document

# Define the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

# Split the documents into smaller chunks
final_docs = text_splitter.split_documents(docs)


# Print the first chunk to verify the split
print(f"Number of chunks: {len(final_docs)}")
if final_docs:
    print("Sample chunk text:", final_docs[0].page_content[:500])  # Display first chunk text


Loaded 170 documents.
Sample document text: 2016  edition
 
BASIC
LAWS
and AUTHORITIES  of the NATIONAL ARCHIVES   
and RECORDS ADMINISTR ATION
Office of General Counsel
National Archives and Records Administration
Additional materials can be found on the web at: www.archives.gov
Number of chunks: 853
Sample chunk text: 2016  edition
 
BASIC
LAWS
and AUTHORITIES  of the NATIONAL ARCHIVES   
and RECORDS ADMINISTR ATION
Office of General Counsel
National Archives and Records Administration
Additional materials can be found on the web at: www.archives.gov


In [None]:
embeddings = HuggingFaceEmbeddings()
vectors = FAISS.from_documents(final_docs,embeddings)

  embeddings = HuggingFaceEmbeddings()
  embeddings = HuggingFaceEmbeddings()


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
np.array(vectors)

array(<langchain_community.vectorstores.faiss.FAISS object at 0x7a29509879d0>,
      dtype=object)

In [None]:
def getAns(prompt1, chat_history=None):
    # Combine previous chat history with the new question if available
    if chat_history is None:
        chat_history = []

    document_chain = create_stuff_documents_chain(llm, prompt)
    retriever = vectors.as_retriever()
    retrival_chain = create_retrieval_chain(retriever, document_chain)

    # Get the response
    response = retrival_chain.invoke({"input": prompt1})
    answer = response["answer"]

    # Append the question and answer to chat history for continuity
    chat_history.append(("You:", prompt1))
    chat_history.append(("Bot:", answer))

    return answer, chat_history


In [None]:
getAns("what is the first president act?")

The first President Act mentioned in the context is the "FORMER PRESIDENTS ACT" mentioned in section (3 U.S.C. § 102 note).


In [None]:
def gradio_interface(query, chat_history=None):
    answer, chat_history = getAns(query, chat_history)

    # Display the question and answer on the interface
    conversation = ""
    for q, a in chat_history:
        conversation += f"{q} {a}\n"

    return conversation, chat_history

# Setup Gradio Interface with state to track conversation
interface = gr.Interface(
    fn=gradio_interface,
    inputs=[gr.Textbox(label="Ask a Legal Question", placeholder="Type your question here..."),
            gr.State()],  # State to store chat history
    outputs=[gr.Textbox(label="Conversation History", interactive=False),
             gr.State()],  # State to store chat history and pass it along
    title="Legal Document Helper Chatbot",
    description="Ask questions related to legal documents, and the chatbot will respond based on the context of the documents you provide.",
    allow_flagging="never"
)

# Launch Gradio interface
interface.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4a0e54d7a82b0a143d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


