In [None]:
# !pip install chromadb==0.5.5 langchain-chroma==0.1.2 langchain==0.2.11 langchain-community==0.2.10 langchain-text-splitters==0.2.2 langchain-groq==0.1.6 transformers==4.43.2 sentence-transformers==3.0.1 unstructured==0.15.0 unstructured[pdf]==0.15.0 gradio

Collecting chromadb==0.5.5
  Downloading chromadb-0.5.5-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-chroma==0.1.2
  Downloading langchain_chroma-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Collecting langchain==0.2.11
  Downloading langchain-0.2.11-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-community==0.2.10
  Downloading langchain_community-0.2.10-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-text-splitters==0.2.2
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langchain-groq==0.1.6
  Downloading langchain_groq-0.1.6-py3-none-any.whl.metadata (2.8 kB)
Collecting transformers==4.43.2
  Downloading transformers-4.43.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentence-transformers==3.0.1
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting unstructured=

In [None]:
import os
import time
import textwrap
import gradio as gr

from dotenv import load_dotenv
from langchain.chains import RetrievalQA

from langchain.document_loaders import UnstructuredFileLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

import warnings
warnings.filterwarnings('ignore')

In [None]:
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [None]:
# List of PDF file paths
pdf_files = [
    "/content/About the GKS.pdf",
    "/content/GOOD FRIDAY AND EASTER.pdf",
    "/content/Was Jesus Christ hanged on a cross or tree_.pdf"
]

# List to store loaded documents
documents = []

# Iterate through each PDF file and load its content
for file_path in pdf_files:
    loader = UnstructuredFileLoader(file_path)
    doc = loader.load()
    documents.append(doc)

In [None]:
# splitting into text chunks
text_splitter = CharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

In [None]:
# texts = text_splitter.split_documents(documents)

# Assuming text_splitter expects individual document objects, not a list
texts = []
for doc in documents:
    # If each doc is already in the right format, this may work
    text_parts = text_splitter.split_documents(doc)
    texts.extend(text_parts)




In [None]:
texts[:5]

[Document(metadata={'source': '/content/About the GKS.pdf'}, page_content="GOD'S KINGDOM SOCIETY (GKS)\n\nMotto: The Church of the Living God AN OVERVIEW\n\nthe The God’s Kingdom Society is purely a Christian Organisation founded by JEHOVAH, Almighty God through the instrumentality of Saint Gideon Meriodere Urhobo, of the blessed memory. Saint G. M. Urhobo, the first President of the GKS, was the instrument or vessel of honour chosen by God through Jesus Christ, according to the election of grace, to accomplish His purpose – Matthew 24:31; Acts 15:14-18; 10:9-35; James 1:18.\n\nHistory:"),
 Document(metadata={'source': '/content/About the GKS.pdf'}, page_content='History:\n\nThe history of the God’s Kingdom Society is linked with the biography of her first President, Brother G. M. Urhobo, in respect of his career as an astute preacher of the gospel. He hailed from the Urhobo ethnic group and was the second son of a kind pagan gentleman, the Late Ukoli of Otovwodo, Agbassa in Warri, who

In [None]:
embeddings = HuggingFaceEmbeddings()

In [None]:
persist_directory = "/content/chroma_db"

In [None]:
vectordb = Chroma.from_documents(
    documents=texts,
    embedding=embeddings,
    persist_directory=persist_directory
)

In [None]:
# retriever
retriever = vectordb.as_retriever()

In [None]:
# llm from groq
llm = ChatGroq(
    model="llama-3.1-70b-versatile",
    temperature=0
)

In [None]:
# create a qa chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

In [None]:
# invoke the qa chain and get a response for user query
query = "Who is Urhobo?"
response = qa_chain.invoke({"query": query})

In [None]:
print(response)



In [None]:
print(response["source_documents"][0].metadata["source"])

/content/About the GKS.pdf


In [None]:
# invoke the qa chain and get a response for user query
query = "Was Jesus hanged on a tree or cross?"
response = qa_chain.invoke({"query": query})
print(response["result"])
print("*"*30)
print("Source Document:", response["source_documents"][0].metadata["source"])

According to the provided text, Jesus Christ was hanged on a tree, not a cross. This is stated by St. Peter, an eyewitness to the crucifixion, in Acts 5:30-32, where he says that Jesus was "slew and hanged on a tree".
******************************
Source Document: /content/Was Jesus Christ hanged on a cross or tree_.pdf


In [None]:
# Make the questions dynamic using a chat interface.
def process_question(user_question):
    start_time = time.time()

    # Directly using the user's question as input for rag_chain.invoke
    response = qa_chain.invoke(user_question)

    # Measure the response time
    end_time = time.time()
    response_time = f"Response time: {end_time - start_time:.2f} seconds."

    # Combine the response and the response time into a single string
    full_response = f"{response}\n\n{response_time}"
    output = f"{textwrap.fill(response['result'][32:], width=80)}"
    return output

# Setup the Gradio interface
iface = gr.Interface(fn=process_question,
                     inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
                     outputs=gr.Textbox(),
                     title="Chat GKS",
                     description="Ask any question about the GKS and Her doctrines.")

# Launch the interface
iface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://e5901caed96334946f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


