fix chroma notebook in docs (#6872)

run-llama · Jul 12, 2023 · 558fce8 · 558fce8
1 parent 0fa8662
commit 558fce8
Showing 1 changed file with 27 additions and 108 deletions.
diff --git a/docs/examples/vector_stores/ChromaIndexDemo.ipynb b/docs/examples/vector_stores/ChromaIndexDemo.ipynb
@@ -76,19 +76,20 @@
    "outputs": [],
    "source": [
     "!pip install llama-index\n",
-    "!pip install langchain\n",
-    "!pip install chromadb"
+    "!pip install chromadb\n",
+    "!pip install sentence-transformers\n",
+    "!pip install pydantic==1.10.11"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 6,
    "id": "d48af8e1",
    "metadata": {},
    "outputs": [],
    "source": [
     "# import\n",
-    "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n",
+    "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
     "from llama_index.vector_stores import ChromaVectorStore\n",
     "from llama_index.storage.storage_context import StorageContext\n",
     "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n",
@@ -99,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 8,
    "id": "374a148b",
    "metadata": {},
    "outputs": [],
@@ -108,53 +109,23 @@
     "import os\n",
     "import getpass\n",
     "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
+    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
+    "import openai\n",
+    "\n",
+    "openai.api_key = os.environ[\"OPENAI_API_KEY\"]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 9,
    "id": "667f3cb3-ce18-48d5-b9aa-bfc1a1f0f0f6",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
-      "Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
-      "Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2\n",
-      "INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu\n",
-      "Use pytorch device: cpu\n",
-      "Use pytorch device: cpu\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
-      "> [retrieve] Total embedding token usage: 8 tokens\n",
-      "> [retrieve] Total embedding token usage: 8 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1874 tokens\n",
-      "> [get_response] Total LLM token usage: 1874 tokens\n",
-      "> [get_response] Total LLM token usage: 1874 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n"
-     ]
-    },
     {
      "data": {
       "text/markdown": [
        "<b>\n",
-       "The author grew up writing essays, learning Italian, exploring Florence, painting people, working with computers, studying at RISD, living in a rent-controlled apartment, building an online store builder, editing code, publishing essays online, writing essays, working on spam filters, cooking for groups, buying a building, and attending parties.</b>"
+       "Growing up, the author wrote short stories, programmed on an IBM 1401, and wrote programs on a TRS-80 microcomputer. He also took painting classes at Harvard and worked as a de facto studio assistant for a painter. He also tried to start a company to put art galleries online, and wrote software to build online stores.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -182,12 +153,13 @@
     "# set up ChromaVectorStore and load in data\n",
     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
+    "service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
     "index = VectorStoreIndex.from_documents(\n",
-    "    documents, storage_context=storage_context, embed_model=embed_model\n",
+    "    documents, storage_context=storage_context, service_context=service_context\n",
     ")\n",
     "\n",
     "# Query Data\n",
-    "query_engine = index.as_query_engine(chroma_collection=chroma_collection)\n",
+    "query_engine = index.as_query_engine()\n",
     "response = query_engine.query(\"What did the author do growing up?\")\n",
     "display(Markdown(f\"<b>{response}</b>\"))"
    ]
@@ -209,72 +181,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 10,
    "id": "9c3a56a5",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "INFO:chromadb.db.duckdb:loaded in 20 embeddings\n",
-      "loaded in 20 embeddings\n",
-      "loaded in 20 embeddings\n",
-      "INFO:chromadb.db.duckdb:loaded in 1 collections\n",
-      "loaded in 1 collections\n",
-      "loaded in 1 collections\n",
-      "INFO:chromadb.db.duckdb:collection with name quickstart already exists, returning existing collection\n",
-      "collection with name quickstart already exists, returning existing collection\n",
-      "collection with name quickstart already exists, returning existing collection\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 17038 tokens\n",
-      "INFO:chromadb.db.duckdb:Persisting DB to disk, putting it in the save folder: ./chroma_db\n",
-      "Persisting DB to disk, putting it in the save folder: ./chroma_db\n",
-      "Persisting DB to disk, putting it in the save folder: ./chroma_db\n",
-      "INFO:chromadb.telemetry.posthog:Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.\n",
-      "INFO:chromadb.db.duckdb:loaded in 40 embeddings\n",
-      "loaded in 40 embeddings\n",
-      "loaded in 40 embeddings\n",
-      "INFO:chromadb.db.duckdb:loaded in 1 collections\n",
-      "loaded in 1 collections\n",
-      "loaded in 1 collections\n",
-      "INFO:chromadb.db.duckdb:collection with name quickstart already exists, returning existing collection\n",
-      "collection with name quickstart already exists, returning existing collection\n",
-      "collection with name quickstart already exists, returning existing collection\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 0 tokens\n",
-      "> [build_index_from_nodes] Total embedding token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "> [retrieve] Total LLM token usage: 0 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
-      "> [retrieve] Total embedding token usage: 8 tokens\n",
-      "> [retrieve] Total embedding token usage: 8 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1877 tokens\n",
-      "> [get_response] Total LLM token usage: 1877 tokens\n",
-      "> [get_response] Total LLM token usage: 1877 tokens\n",
-      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n",
-      "> [get_response] Total embedding token usage: 0 tokens\n"
-     ]
-    },
     {
      "data": {
       "text/markdown": [
        "<b>\n",
-       "The author grew up skipping a step in the evolution of computers, learning Italian, exploring Florence, painting people, working with technology companies, seeking signature styles at RISD, living in a rent-stabilized apartment, launching an online store builder, editing Lisp expressions, and publishing essays online.</b>"
+       "Growing up, the author wrote short stories, programmed on an IBM 1401, and wrote programs on a TRS-80 microcomputer. He also took painting classes at Harvard and worked as a de facto studio assistant for a painter. He also tried to start a company to put art galleries online, and wrote software to build online stores.</b>"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -294,8 +209,9 @@
     "chroma_collection = db.get_or_create_collection(\"quickstart\")\n",
     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
+    "service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
     "index = VectorStoreIndex.from_documents(\n",
-    "    documents, storage_context=storage_context, embed_model=embed_model\n",
+    "    documents, storage_context=storage_context, service_context=service_context\n",
     ")\n",
     "db.persist()\n",
     "\n",
@@ -307,11 +223,13 @@
     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
     "index = VectorStoreIndex.from_vector_store(\n",
-    "    vector_store=vector_store, storage_context=storage_context, embed_model=embed_model\n",
+    "    vector_store=vector_store,\n",
+    "    storage_context=storage_context,\n",
+    "    service_context=service_context,\n",
     ")\n",
     "\n",
     "# Query Data from the persisted index\n",
-    "query_engine = index.as_query_engine(chroma_collection=chroma_collection)\n",
+    "query_engine = index.as_query_engine()\n",
     "response = query_engine.query(\"What did the author do growing up?\")\n",
     "display(Markdown(f\"<b>{response}</b>\"))"
    ]
@@ -371,8 +289,9 @@
     "chroma_collection = remote_db.get_or_create_collection(\"quickstart\")\n",
     "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
     "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
+    "service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
     "index = VectorStoreIndex.from_documents(\n",
-    "    documents, storage_context=storage_context, embed_model=embed_model\n",
+    "    documents, storage_context=storage_context, service_context=service_context\n",
     ")"
    ]
   },
@@ -416,7 +335,7 @@
    ],
    "source": [
     "# Query Data from the Chroma Docker index\n",
-    "query_engine = index.as_query_engine(chroma_collection=chroma_collection)\n",
+    "query_engine = index.as_query_engine()\n",
     "response = query_engine.query(\"What did the author do growing up?\")\n",
     "display(Markdown(f\"<b>{response}</b>\"))"
    ]
@@ -487,7 +406,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.9.6"
   },
   "vscode": {
    "interpreter": {