Skip to content

Commit

Permalink
fix chroma notebook in docs (#6872)
Browse files Browse the repository at this point in the history
  • Loading branch information
logan-markewich committed Jul 12, 2023
1 parent 0fa8662 commit 558fce8
Showing 1 changed file with 27 additions and 108 deletions.
135 changes: 27 additions & 108 deletions docs/examples/vector_stores/ChromaIndexDemo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,20 @@
"outputs": [],
"source": [
"!pip install llama-index\n",
"!pip install langchain\n",
"!pip install chromadb"
"!pip install chromadb\n",
"!pip install sentence-transformers\n",
"!pip install pydantic==1.10.11"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 6,
"id": "d48af8e1",
"metadata": {},
"outputs": [],
"source": [
"# import\n",
"from llama_index import VectorStoreIndex, SimpleDirectoryReader\n",
"from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
"from llama_index.vector_stores import ChromaVectorStore\n",
"from llama_index.storage.storage_context import StorageContext\n",
"from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
Expand All @@ -99,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 8,
"id": "374a148b",
"metadata": {},
"outputs": [],
Expand All @@ -108,53 +109,23 @@
"import os\n",
"import getpass\n",
"\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"import openai\n",
"\n",
"openai.api_key = os.environ[\"OPENAI_API_KEY\"]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 9,
"id": "667f3cb3-ce18-48d5-b9aa-bfc1a1f0f0f6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
"Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
"Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
"INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu\n",
"Use pytorch device: cpu\n",
"Use pytorch device: cpu\n",
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
"> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
"> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
"> [retrieve] Total LLM token usage: 0 tokens\n",
"> [retrieve] Total LLM token usage: 0 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
"> [retrieve] Total embedding token usage: 8 tokens\n",
"> [retrieve] Total embedding token usage: 8 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1874 tokens\n",
"> [get_response] Total LLM token usage: 1874 tokens\n",
"> [get_response] Total LLM token usage: 1874 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
"> [get_response] Total embedding token usage: 0 tokens\n",
"> [get_response] Total embedding token usage: 0 tokens\n"
]
},
{
"data": {
"text/markdown": [
"<b>\n",
"The author grew up writing essays, learning Italian, exploring Florence, painting people, working with computers, studying at RISD, living in a rent-controlled apartment, building an online store builder, editing code, publishing essays online, writing essays, working on spam filters, cooking for groups, buying a building, and attending parties.</b>"
"Growing up, the author wrote short stories, programmed on an IBM 1401, and wrote programs on a TRS-80 microcomputer. He also took painting classes at Harvard and worked as a de facto studio assistant for a painter. He also tried to start a company to put art galleries online, and wrote software to build online stores.</b>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
Expand Down Expand Up @@ -182,12 +153,13 @@
"# set up ChromaVectorStore and load in data\n",
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
"service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
"index = VectorStoreIndex.from_documents(\n",
" documents, storage_context=storage_context, embed_model=embed_model\n",
" documents, storage_context=storage_context, service_context=service_context\n",
")\n",
"\n",
"# Query Data\n",
"query_engine = index.as_query_engine(chroma_collection=chroma_collection)\n",
"query_engine = index.as_query_engine()\n",
"response = query_engine.query(\"What did the author do growing up?\")\n",
"display(Markdown(f\"<b>{response}</b>\"))"
]
Expand All @@ -209,72 +181,15 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 10,
"id": "9c3a56a5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"INFO:chromadb.db.duckdb:loaded in 20 embeddings\n",
"loaded in 20 embeddings\n",
"loaded in 20 embeddings\n",
"INFO:chromadb.db.duckdb:loaded in 1 collections\n",
"loaded in 1 collections\n",
"loaded in 1 collections\n",
"INFO:chromadb.db.duckdb:collection with name quickstart already exists, returning existing collection\n",
"collection with name quickstart already exists, returning existing collection\n",
"collection with name quickstart already exists, returning existing collection\n",
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
"> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
"> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
"INFO:chromadb.db.duckdb:Persisting DB to disk, putting it in the save folder: ./chroma_db\n",
"Persisting DB to disk, putting it in the save folder: ./chroma_db\n",
"Persisting DB to disk, putting it in the save folder: ./chroma_db\n",
"INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
"INFO:chromadb.db.duckdb:loaded in 40 embeddings\n",
"loaded in 40 embeddings\n",
"loaded in 40 embeddings\n",
"INFO:chromadb.db.duckdb:loaded in 1 collections\n",
"loaded in 1 collections\n",
"loaded in 1 collections\n",
"INFO:chromadb.db.duckdb:collection with name quickstart already exists, returning existing collection\n",
"collection with name quickstart already exists, returning existing collection\n",
"collection with name quickstart already exists, returning existing collection\n",
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens\n",
"> [build_index_from_nodes] Total embedding token usage: 0 tokens\n",
"> [build_index_from_nodes] Total embedding token usage: 0 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
"> [retrieve] Total LLM token usage: 0 tokens\n",
"> [retrieve] Total LLM token usage: 0 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
"> [retrieve] Total embedding token usage: 8 tokens\n",
"> [retrieve] Total embedding token usage: 8 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1877 tokens\n",
"> [get_response] Total LLM token usage: 1877 tokens\n",
"> [get_response] Total LLM token usage: 1877 tokens\n",
"INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
"> [get_response] Total embedding token usage: 0 tokens\n",
"> [get_response] Total embedding token usage: 0 tokens\n"
]
},
{
"data": {
"text/markdown": [
"<b>\n",
"The author grew up skipping a step in the evolution of computers, learning Italian, exploring Florence, painting people, working with technology companies, seeking signature styles at RISD, living in a rent-stabilized apartment, launching an online store builder, editing Lisp expressions, and publishing essays online.</b>"
"Growing up, the author wrote short stories, programmed on an IBM 1401, and wrote programs on a TRS-80 microcomputer. He also took painting classes at Harvard and worked as a de facto studio assistant for a painter. He also tried to start a company to put art galleries online, and wrote software to build online stores.</b>"
],
"text/plain": [
"<IPython.core.display.Markdown object>"
Expand All @@ -294,8 +209,9 @@
"chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
"service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
"index = VectorStoreIndex.from_documents(\n",
" documents, storage_context=storage_context, embed_model=embed_model\n",
" documents, storage_context=storage_context, service_context=service_context\n",
")\n",
"db.persist()\n",
"\n",
Expand All @@ -307,11 +223,13 @@
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
"index = VectorStoreIndex.from_vector_store(\n",
" vector_store=vector_store, storage_context=storage_context, embed_model=embed_model\n",
" vector_store=vector_store,\n",
" storage_context=storage_context,\n",
" service_context=service_context,\n",
")\n",
"\n",
"# Query Data from the persisted index\n",
"query_engine = index.as_query_engine(chroma_collection=chroma_collection)\n",
"query_engine = index.as_query_engine()\n",
"response = query_engine.query(\"What did the author do growing up?\")\n",
"display(Markdown(f\"<b>{response}</b>\"))"
]
Expand Down Expand Up @@ -371,8 +289,9 @@
"chroma_collection = remote_db.get_or_create_collection(\"quickstart\")\n",
"vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
"storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
"service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
"index = VectorStoreIndex.from_documents(\n",
" documents, storage_context=storage_context, embed_model=embed_model\n",
" documents, storage_context=storage_context, service_context=service_context\n",
")"
]
},
Expand Down Expand Up @@ -416,7 +335,7 @@
],
"source": [
"# Query Data from the Chroma Docker index\n",
"query_engine = index.as_query_engine(chroma_collection=chroma_collection)\n",
"query_engine = index.as_query_engine()\n",
"response = query_engine.query(\"What did the author do growing up?\")\n",
"display(Markdown(f\"<b>{response}</b>\"))"
]
Expand Down Expand Up @@ -487,7 +406,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.9.6"
},
"vscode": {
"interpreter": {
Expand Down

0 comments on commit 558fce8

Please sign in to comment.