## This notebook-guide is illustrating the basic use-cases for the ChromaDB 

poetry install

In [2]:
# let's import all the things we need first
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders.pdf import PyPDFLoader
# from PyPDF2 import PdfReader

In [3]:
# load the document and split it into chunks

loader = PyPDFLoader("pdfs/Sports-And-Exercise-Nutrition.pdf")
documents = loader.load()

In [4]:
# split it into chunks
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

texts = text_splitter.split_documents(documents)

Created a chunk of size 1307, which is longer than the specified 1000
Created a chunk of size 1346, which is longer than the specified 1000
Created a chunk of size 1419, which is longer than the specified 1000
Created a chunk of size 1102, which is longer than the specified 1000
Created a chunk of size 1230, which is longer than the specified 1000
Created a chunk of size 1248, which is longer than the specified 1000
Created a chunk of size 1496, which is longer than the specified 1000
Created a chunk of size 1050, which is longer than the specified 1000
Created a chunk of size 2026, which is longer than the specified 1000
Created a chunk of size 1118, which is longer than the specified 1000
Created a chunk of size 1029, which is longer than the specified 1000
Created a chunk of size 1571, which is longer than the specified 1000
Created a chunk of size 1631, which is longer than the specified 1000
Created a chunk of size 2110, which is longer than the specified 1000
Created a chunk of s

In [5]:
import os
from langchain.embeddings.cohere import CohereEmbeddings
COHERE_API_KEY = ""
cohere_embeddings = CohereEmbeddings(cohere_api_key=COHERE_API_KEY) # type: ignore

In [6]:
db_using_cohere_embeddings = Chroma.from_documents(documents=texts, embedding=cohere_embeddings)

In [7]:
# query it
query = "What is the best time to eat before a workout?"

In [8]:
docs =  db_using_cohere_embeddings.similarity_search(query)

In [9]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import Cohere

def load_chain(chain_type: str = "map_reduce"):
    """
    this function is loading the chain and sets it up for the agent to use
    :param chain_type: the type of chain to use
    :return: the LLM chain object
    """
    chain = load_qa_chain(
        Cohere(cohere_api_key=COHERE_API_KEY, verbose=True),  # type: ignore
        chain_type=chain_type,
        # Setting verbose to True will print out some internal states of the Chain object while it is being ran.
        verbose=False,
    )
    return chain

In [10]:
chain = load_chain()
response = chain.run(input_documents=docs, question=query)

  from .autonotebook import tqdm as notebook_tqdm
Token indices sequence length is longer than the specified maximum sequence length for this model (1774 > 1024). Running this sequence through the model will result in indexing errors


In [11]:
print("RESPONSE FROM COHERE EMBEDDINGS: ", response)

RESPONSE FROM COHERE EMBEDDINGS:   A pre-exercise meal should contain sufficient fluid.


In [12]:
import chromadb
from chromadb.config import Settings

client = chromadb.Client(Settings(
    # chroma_db_impl=duckdb+parquet, 
    persist_directory=".chroma/"
))

In [13]:
my_collection = client.create_collection("book_embeds_2")

In [14]:
print(docs[0].page_content)

In summary, carbohydrate consumed in the 3–4 
hours prior to an event may help to achieve the  following sports nutrition goals:
● to continue to fill muscle glycogen stores if they have not fully restored or loaded since the last exercise session;
● to restore liver glycogen levels, especially for events undertaken in the morning where liver stores are depleted from an overnight fast;
● to prevent hunger (yet avoid gastrointestinal  discomfort and upset during exercise);
● to improve endurance capacity during prolonged exercise.
Carbohydrate within the hour before exercise and performanceEarly studies on the influence of ingesting carbo-hydrate within the hour before exercise suggested that this practice would reduce performance. These studies showed that there was a greater rate of glycogen degradation during exercise after ingesting a concentrated carbohydrate solution and in one study fatigue occurred sooner (19%) during cycling to exhaustion at 80% V o


In [15]:
ls = []
for doc in docs:
    ls.append(doc.page_content)


In [16]:
my_embeddings = cohere_embeddings.embed_documents(ls)

In [17]:
my_collection.add(
    documents=docs,
    embeddings=my_embeddings,
    ids=str([i for i in range(len(my_embeddings))])
)

ValueError: Number of embeddings 4 must match number of ids 1