In [44]:
import chromadb
import pandas as pd
import pprint

In [2]:
def load_bible_csv(file_path="../../data/bible/cleaned/new american standard cleaned.csv"):
    df2 = pd.read_csv(file_path)
    print(df2.info())
    return df2

In [21]:
def get_chroma_bible_collection():
    client = chromadb.PersistentClient(path='/Volumes/4Tera_SSD_2022/chroma_db/bible')
    collection = client.get_or_create_collection(name="bible_verses_v1")
    print(f"collection retrieved with count: {collection.count()}")
    print(f"collection items peek: {collection.peek()}")
    return collection

In [14]:
def create_documents_for_bible_verses(bible_verses_df, bible_collection):
    for index, row in bible_verses_df.iterrows():
        translation = "new american standard"
        verse_text = row['verse_text']
        book = row['book']
        chapter = row['chapter']
        verse_number = row['verse_number']
        is_new_testament = row['is_new_testament']
        id = f"{translation} {book} {chapter} {verse_number}"
        # print(verse_text)
        bible_collection.add(
            ids=[id],
            documents = [verse_text],
            metadatas = [
                {
                    "translation": translation,
                    "book" : book,
                    "chapter": chapter,
                    "verse_number": verse_number,
                    "is_new_testament": is_new_testament,
                }
            ]
        )

In [27]:
df = load_bible_csv()
bible_verses_df = df #df.iloc[:1000]
bible_collection = get_chroma_bible_collection()

create_documents_for_bible_verses(bible_verses_df, bible_collection)

Insert of existing embedding ID: new american standard genesis 1 1
Add of existing embedding ID: new american standard genesis 1 1
Insert of existing embedding ID: new american standard genesis 1 2
Add of existing embedding ID: new american standard genesis 1 2
Insert of existing embedding ID: new american standard genesis 1 3
Add of existing embedding ID: new american standard genesis 1 3
Insert of existing embedding ID: new american standard genesis 1 4
Add of existing embedding ID: new american standard genesis 1 4
Insert of existing embedding ID: new american standard genesis 1 5
Add of existing embedding ID: new american standard genesis 1 5
Insert of existing embedding ID: new american standard genesis 1 6
Add of existing embedding ID: new american standard genesis 1 6
Insert of existing embedding ID: new american standard genesis 1 7
Add of existing embedding ID: new american standard genesis 1 7
Insert of existing embedding ID: new american standard genesis 1 8
Add of existing 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31102 entries, 0 to 31101
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   verse_text        31102 non-null  object
 1   book              31102 non-null  object
 2   chapter           31102 non-null  int64 
 3   verse_number      31102 non-null  int64 
 4   is_new_testament  31102 non-null  int64 
dtypes: int64(3), object(2)
memory usage: 1.2+ MB
None
collection retrieved with count: 1000
collection items peek: {'ids': ['new american standard genesis 1 1', 'new american standard genesis 1 2', 'new american standard genesis 1 3', 'new american standard genesis 1 4', 'new american standard genesis 1 5', 'new american standard genesis 1 6', 'new american standard genesis 1 7', 'new american standard genesis 1 8', 'new american standard genesis 1 9', 'new american standard genesis 1 10'], 'embeddings': [[0.01197346206754446, 0.06193183735013008, -0.000726767058

Insert of existing embedding ID: new american standard genesis 1 12
Add of existing embedding ID: new american standard genesis 1 12
Insert of existing embedding ID: new american standard genesis 1 13
Add of existing embedding ID: new american standard genesis 1 13
Insert of existing embedding ID: new american standard genesis 1 14
Add of existing embedding ID: new american standard genesis 1 14
Insert of existing embedding ID: new american standard genesis 1 15
Add of existing embedding ID: new american standard genesis 1 15
Insert of existing embedding ID: new american standard genesis 1 16
Add of existing embedding ID: new american standard genesis 1 16
Insert of existing embedding ID: new american standard genesis 1 17
Add of existing embedding ID: new american standard genesis 1 17
Insert of existing embedding ID: new american standard genesis 1 18
Add of existing embedding ID: new american standard genesis 1 18
Insert of existing embedding ID: new american standard genesis 1 19
A

In [72]:
def map_query_result_to_domain_model(result):
    # combined_data = [
    #     {
    #         'id': id[0],
    #         'verse_text': document[0],
    #         'book': metadata[0]['book'],
    #         'chapter': metadata[0]['chapter'],
    #         'verse_number': metadata[0]['verse_number'],
    #         'translation': metadata[0]['book'],
    #         'is_new_testament': metadata[0]['is_new_testament'],
    #         # 'metadata': metadata[0],
    #         'distance': distance[0],
    #     }
    #     for id, document, metadata, distance in zip(result['ids'], result['documents'], result['metadatas'], result['distances'])
    # ]
    combined_data = [
        {
            'id': id,
            'verse_text': document,
            'book': metadata['book'],
            'chapter': metadata['chapter'],
            'verse_number': metadata['verse_number'],
            'translation': metadata['translation'],
            'is_new_testament': metadata['is_new_testament'],
            'distance': distance
        }
        for id, document, metadata, distance in zip(result['ids'][0], result['documents'][0], result['metadatas'][0], result['distances'][0])
    ]
    return combined_data

def print_bible_collection_query_result(domain_model_results):
    for item in domain_model_results:
        text = f"{item['distance']} {item['book']} {item['chapter']}:{item['verse_number']} - {item['verse_text']}"
        print(text)



In [86]:
# https://docs.trychroma.com/usage-guide
result = bible_collection.query(
    query_texts=["jesus"],
    n_results=1000,
    # where_document={"$contains": "God"},
    where={
        # "book": "revelation",
        # "$and": [
        #     {
        #         "book": "revelation",  #case sensitive
        #     },
        #     {
        #         "is_new_testament": 1,
        #     }
        # ]
    }
)
# print(result)

domain_model_results = map_query_result_to_domain_model(result)
print_bible_collection_query_result(domain_model_results)


1.0166436433792114 john 11:35 - Jesus wept.
1.1058306694030762 1 corinthians 16:23 - The grace of the Lord Jesus be with you.
1.1094359159469604 2 thessalonians 3:18 - The grace of our Lord Jesus Christ be with you all.
1.1171596050262451 1 thessalonians 5:28 - The grace of our Lord Jesus Christ be with you.
1.1432687044143677 john 12:44 - And Jesus cried out and said, "He who believes in Me, does not believe in Me but in Him who sent Me.
1.1440531015396118 luke 8:28 - Seeing Jesus, he cried out and fell before Him, and said in a loud voice, "What business do we have with each other, Jesus, Son of the Most High God? I beg You, do not torment me."
1.1454205513000488 1 corinthians 16:24 - My love be with you all in Christ Jesus. Amen.
1.1765319108963013 john 3:2 - this man came to Jesus by night and said to Him, "Rabbi, we know that You have come from God as a teacher; for no one can do these signs that You do unless God is with him."
1.1804759502410889 john 1:29 - The next day he *saw J

In [76]:
result = bible_collection.query(
    query_texts=["hell"],
    n_results=10,
    # where_document={"$contains": "beginning"},
    where={
        "book": "revelation",
    }
)
print(result)

{'ids': [['new american standard revelation 12 10', 'new american standard revelation 2 10', 'new american standard revelation 19 1', 'new american standard revelation 22 21', 'new american standard revelation 20 7', 'new american standard revelation 10 6', 'new american standard revelation 2 22', 'new american standard revelation 18 4', 'new american standard revelation 18 5', 'new american standard revelation 14 10']], 'distances': [[1.4831370115280151, 1.4916678667068481, 1.4990198612213135, 1.501265400368289, 1.5412180423736572, 1.5436162948608398, 1.5486841201782227, 1.560583472251892, 1.5631625652313232, 1.5634431838989258]], 'metadatas': [[{'book': 'revelation', 'chapter': 12, 'is_new_testament': 1, 'translation': 'new american standard', 'verse_number': 10}, {'book': 'revelation', 'chapter': 2, 'is_new_testament': 1, 'translation': 'new american standard', 'verse_number': 10}, {'book': 'revelation', 'chapter': 19, 'is_new_testament': 1, 'translation': 'new american standard', '