# Bible ChatBot

In [1]:
# load environment variables
from dotenv import load_dotenv
import os

load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")


## Loading Data

In [2]:
from langchain_community.document_loaders import PyPDFLoader

# Import the document loader from loading in pdf file
data_path = "../data/whole_bible_niv1984.pdf"

# setup loader
loader = PyPDFLoader(data_path)

# convert to documents
docs = loader.load()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Get the first 10 docs
docs[:10]

[Document(metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'Microsoft® Word 2013', 'creationdate': '2018-06-28T21:57:28-07:00', 'title': 'Holy Bible - New International Version', 'author': 'Biiible', 'moddate': '2018-06-28T14:14:01+00:00', 'source': '../data/whole_bible_niv1984.pdf', 'total_pages': 1871, 'page': 0, 'page_label': '1'}, page_content='1 \n \n  \nHoly Bible \nNew International Version \n \n \n \n \n \n \n \n \nAbout the New International Version –  \nThe New International Version was undertaken by an independent \ncommittee in after a general consensus that there was a need for a \nnew, contemporary English translation of the Bible. \n \nWith the help of scholars from all over the world, and multiple reviews \nfrom a committee of multiple denominations, the New International \nVersion has earned the widespread respect of all Christians as one of \nthe best translations available.'),
 Document(metadata={'producer':

In [4]:
# find the number of total docs (i. e number of pages)
len(docs)

1871

## Splitting Data

In [5]:
# Split the docs into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)

split_documents = splitter.split_documents(docs)

In [6]:
split_documents[:10]

[Document(metadata={'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'creator': 'Microsoft® Word 2013', 'creationdate': '2018-06-28T21:57:28-07:00', 'title': 'Holy Bible - New International Version', 'author': 'Biiible', 'moddate': '2018-06-28T14:14:01+00:00', 'source': '../data/whole_bible_niv1984.pdf', 'total_pages': 1871, 'page': 0, 'page_label': '1'}, page_content='1 \n \n  \nHoly Bible \nNew International Version \n \n \n \n \n \n \n \n \nAbout the New International Version –  \nThe New International Version was undertaken by an independent \ncommittee in after a general consensus that there was a need for a \nnew, contemporary English translation of the Bible. \n \nWith the help of scholars from all over the world, and multiple reviews \nfrom a committee of multiple denominations, the New International \nVersion has earned the widespread respect of all Christians as one of'),
 Document(metadata={'producer': '3-Heights(TM) PDF Optimization Sh

## Creating Embeddings

In [7]:
from  langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

## Creating vector stores

In [8]:
from langchain_chroma import Chroma

vector_store = Chroma.from_documents(
    documents=split_documents,
    embedding=embeddings,
    persist_directory="../bible_vectorstore",
    collection_name="kjv_vectorstore")

BATCH_SIZE = 5461  # must be <= 5461

# processing in batches since its showing error when we batch the whole as one
for i in range(0, len(split_documents), BATCH_SIZE):
    batch = split_documents[i : i + BATCH_SIZE]
    vector_store.add_documents(batch)


In [9]:
# Testing by querying 
vector_store.similarity_search("Who is the father of Abraham")

[Document(id='5ce209ef-240e-4857-951c-61529d5f21e7', metadata={'author': 'Biiible', 'creator': 'Microsoft® Word 2013', 'moddate': '2018-06-28T14:14:01+00:00', 'total_pages': 1871, 'page_label': '39', 'creationdate': '2018-06-28T21:57:28-07:00', 'title': 'Holy Bible - New International Version', 'producer': '3-Heights(TM) PDF Optimization Shell 4.8.25.2 (http://www.pdf-tools.com)', 'source': '../data/whole_bible_niv1984.pdf', 'page': 38}, page_content="and thirty-seven years. He breathed his \nlast and died, and he was gathered to \nhis people.  \n18His descendants settled in the area \nfrom Havilah to Shur, near the border of \nEgypt, as you go toward Asshur. And \nthey lived in hostility toward all their \nbrothers.  \n19This is the account of Abraham's son \nIsaac. Abraham became the father of \nIsaac,  \n20and Isaac was forty years old when he \nmarried Rebekah daughter of Bethuel \nthe Aramean from Paddan Aram and \nsister of Laban the Aramean."),
 Document(id='7965c243-2573-43b4-8