In [2]:
import chromadb
from chromadb.config import Settings

settings = Settings(chroma_db_impl="duckdb+parquet", persist_directory="db/") 

client = chromadb.Client()

In [6]:
bible_text = []  # Initialize an empty list

with open("../data/kjvdat.txt", "r", encoding="utf-8") as f:
    for line in f:  # Read the file line by line
        cleaned_line = line.strip()  # Remove extra whitespace/newlines
        if cleaned_line:  # Check if the line is not empty
            bible_text.append(cleaned_line)

# Print first 50 lines to check the result
print(bible_text[:2])


['Gen|1|1| In the beginning God created the heaven and the earth.~', 'Gen|1|2| And the earth was without form, and void; and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters.~']


In [8]:
import ollama
embedding_model = "nomic-embed-text:latest"

def get_embedding(text):
   return ollama.embed(embedding_model, text)

In [3]:
import sqlite3

# Function to create the SQLite table
def create_bible_table():
    conn = sqlite3.connect("bible.db")
    cursor = conn.cursor()
    
    # Create the table if it doesn't exist
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS bible_verses (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            verse TEXT UNIQUE NOT NULL,
            text TEXT NOT NULL
        )
    """)
    
    conn.commit()
    conn.close()
    print("✅ SQLite table 'bible_verses' created successfully!")

# Function to insert verses into the database
def insert_verses_from_file(file_path):
    conn = sqlite3.connect("bible.db")
    cursor = conn.cursor()
    
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            cleaned_line = line.strip()  # Remove leading/trailing whitespace
            if cleaned_line:
                parts = cleaned_line.split("|", 3)  # Split into 4 parts
                
                if len(parts) == 4:
                    verse_ref = f"{parts[0]} {parts[1]}:{parts[2]}"  # Format: "Gen 1:1"
                    verse_text = parts[3].strip()  # Extract verse text

                    # Insert into database (ignore duplicates)
                    cursor.execute(
                        "INSERT OR IGNORE INTO bible_verses (verse, text) VALUES (?, ?)",
                        (verse_ref, verse_text)
                    )
    
    conn.commit()
    conn.close()
    print("✅ Bible verses successfully inserted into SQLite database!")

# Function to search for a specific verse
def search_verse(verse_reference):
    conn = sqlite3.connect("bible.db")
    cursor = conn.cursor()
    
    cursor.execute("SELECT text FROM bible_verses WHERE verse = ?", (verse_reference,))
    result = cursor.fetchone()
    
    conn.close()
    return result[0] if result else "❌ Verse not found."

create_bible_table()  # Step 1: Create the database table
insert_verses_from_file("../data/kjvdat.txt")  # Step 2: Insert Bible verses

# Step 3: Query Example
verse_text = search_verse("Gen 1:1")
print(f"📖 Genesis 1:1 - {verse_text}")


✅ SQLite table 'bible_verses' created successfully!
✅ Bible verses successfully inserted into SQLite database!
📖 Genesis 1:1 - In the beginning God created the heaven and the earth.~


In [14]:
import chromadb
import tqdm
import sqlite3
import json

# Initialize ChromaDB
client = chromadb.PersistentClient(path="literature_chroma_db")

# Create collections for Bible & Shakespeare
bible_collection = client.get_or_create_collection(name="bible_verses")
shakespeare_collection = client.get_or_create_collection(name="shakespeare_sentences")

# Load Bible verses into ChromaDB
def load_bible_into_chroma():
    conn = sqlite3.connect("bible.db")
    cursor = conn.cursor()
    cursor.execute("SELECT verse, text FROM bible_verses")
    verses = cursor.fetchall()
    
    for verse_ref, verse_text in tqdm(verses, desc="Loading Bible Verses into ChromaDB", unit="verse"):
        response = ollama.embed(embedding_model, verse_text)
        embedding = response["embeddings"]
        bible_collection.add(
            ids=[verse_ref],
            embeddings=[embedding],
            metadatas=[{"verse": verse_ref, "text": verse_text}]
        )
    
    conn.close()
    print("✅ Bible verses loaded into ChromaDB!")

# Load Shakespeare sentences into ChromaDB
def load_shakespeare_into_chroma():
    conn = sqlite3.connect("shakespeare.db")
    cursor = conn.cursor()
    cursor.execute("SELECT id, sentence FROM shakespeare_sentences")
    sentences = cursor.fetchall()
    for id_num, sentence in tqdm(sentences, desc="Loading Shakespeare Sentences into ChromaDB", unit="sentence"):
        embedding = get_embedding(sentence)
        shakespeare_collection.add(
            ids=[f"shakespeare_{id_num}"],
            embeddings=[embedding],
            metadatas=[{"sentence": sentence}]
        )
    
    conn.close()
    print("✅ Shakespeare sentences loaded into ChromaDB!")



In [None]:
import tqdm
shakespeare_collection = client.get_or_create_collection(name="shakespeare_sentences")

def load_shakespeare_into_chroma():
    conn = sqlite3.connect("shakespeare.db")
    cursor = conn.cursor()
    cursor.execute("SELECT id, sentence FROM shakespeare_sentences")
    sentences = cursor.fetchall()
    print(f"found: {sentences}")
    
    for id_num, sentence in sentences:
        response = ollama.embed(embedding_model, sentence)
        print(response)
        embedding = response["embeddings"]
        shakespeare_collection.add(
            ids=[f"shakespeare_{id_num}"],
            embeddings=[embedding],
            metadatas=[{"sentence": sentence}]
        )

    conn.close()
    print("✅ Shakespeare sentences successfully loaded into ChromaDB!")

load_shakespeare_into_chroma()


model='nomic-embed-text:latest' created_at=None done=None done_reason=None total_duration=36778100 load_duration=4077100 prompt_eval_count=42 prompt_eval_duration=None eval_count=None eval_duration=None embeddings=[[0.01491411, 0.07087237, -0.17409, -0.03419665, 0.004641032, -0.0012618429, -0.034135595, 0.085371174, 0.017503193, 0.030014476, 0.01623082, -0.011939034, 0.008999096, -0.017708346, -0.014030876, 0.009012386, -0.0116520105, 0.0032125453, -0.021101283, 0.038860578, -0.010933216, -0.04053854, -0.043071013, 0.019766444, 0.078553095, 0.02562076, -0.007108358, -0.031539697, -0.053737674, 0.031463377, -0.024647724, 0.010442899, 0.014177314, 0.03413647, -0.007256158, -0.024609854, -0.033740412, 0.0013566721, -0.045818053, 0.07303926, 0.038916804, 0.017972553, -0.026348766, -0.0059034643, 0.09039154, -0.016903024, 0.03297986, -0.004636079, 0.020369189, -0.004099851, 0.060046516, -0.056467418, -0.0018463683, -0.03801185, 0.07044417, 0.035466682, 0.01587765, -0.039365906, -0.021792438

: 

In [5]:
load_bible_into_chroma()
load_shakespeare_into_chroma()


TypeError: 'module' object is not callable