In [None]:
# Load bible data and create a dataframe from the pkl
import pandas as pd
import numpy as np

In [None]:

df = pd.read_pickle('bible_df.pkl')
# Print out the column names
print(df.columns)

In [None]:
# Load the bible into Chroma - use the default embeddings
import chromadb
from chromadb.config import Settings

# Create a client, and create a collection
collection_name = 'the_bible'
persist_directory = 'chroma'

client = chromadb.Client(Settings(persist_directory=persist_directory, chroma_db_impl="duckdb+parquet"))
collection = client.create_collection(collection_name)

# Add the bible to the collection - Chroma will embed it for you

chapters = df['Chapter'].tolist()
books = df['Book Name'].tolist()
verses = df['Verse'].tolist()

collection.add(
    ids = [str(id) for id in df['Verse ID'].tolist()],
    embeddings=df['Embedding'].tolist(),
    documents=df['Text'].tolist(),
    metadatas=[{"chapter": chapter, "book": book, "verse": verse} for chapter, book, verse in zip(chapters, books, verses)]
)

client.persist()
del client

In [None]:
items = collection.get()

embeddings = items['embeddings']
books = [metadata['book'] for metadata in items['metadatas']]

print(len(embeddings))
del collection

In [None]:
# Project the bible embeddings to 2D using UMAP
import umap
import umap.plot

mapper = umap.UMAP().fit(embeddings)

umap.plot.points(mapper, labels=np.array(books), background='black', show_legend=False)


In [1]:
# Let's ask God some questions
from langchain.vectorstores import Chroma
from langchain.chains import VectorDBQAWithSourcesChain
from langchain import OpenAI

collection_name = 'the_bible'
persist_directory = 'chroma'

# Read in the oepnai api key from openai.key
openai_api_key = open('openai.key', 'r').read()

docsearch = Chroma(collection_name=collection_name, persist_directory=persist_directory)
chain = VectorDBQAWithSourcesChain.from_chain_type(OpenAI(temperature=0, openai_api_key=openai_api_key), chain_type="stuff", vectorstore=docsearch)

chain({"question": "What is the greatest good?"}, return_only_outputs=True)

ModuleNotFoundError: No module named 'langchain.vectorstores'; 'langchain' is not a package