[NeuralChat] Add langchain extension example and update notebook (#1237)

* Add langchain extension example and update notebook Signed-off-by: lvliang-intel <liang1.lv@intel.com>
intel · Feb 2, 2024 · d40e2f1 · d40e2f1
1 parent 7733d44
commit d40e2f1
Show file tree

Hide file tree

Showing 3 changed files with 190 additions and 7 deletions.
diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/langchain_extension_api.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/langchain_extension_api.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Intel Extension for Transformers provides a comprehensive suite of Langchain-based extension APIs, including advanced retrievers, embedding models, and vector stores. These enhancements are carefully crafted to expand the capabilities of the original langchain API, ultimately boosting overall performance. This extension is specifically tailored to enhance the functionality and performance of RAG."
+    "Intel Extension for Transformers provides a comprehensive suite of Langchain-based extension APIs, including advanced retrievers, embedding models, and vector stores. These enhancements are carefully crafted to expand the capabilities of the original Langchain API, ultimately boosting overall performance. This extension is specifically tailored to enhance the functionality and performance of RAG."
    ]
   },
   {
@@ -69,7 +69,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!curl -OL https://d1io3yog0oux5.cloudfront.net/_897efe2d574a132883f198f2b119aa39/intel/db/888/8941/file/412439%281%29_12_Intel_AR_WR.pdf"
+    "!curl -o Intel_AR_WR.pdf https://d1io3yog0oux5.cloudfront.net/_897efe2d574a132883f198f2b119aa39/intel/db/888/8941/file/412439%281%29_12_Intel_AR_WR.pdf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Chatbot code with Langchain APIs:"
    ]
   },
   {
@@ -79,32 +86,79 @@
    "outputs": [],
    "source": [
     "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
+    "from langchain_community.document_loaders import PyPDFLoader\n",
     "from langchain.chains import RetrievalQA\n",
     "from langchain_core.vectorstores import VectorStoreRetriever\n",
-    "from langchain_core.documents import Document\n",
     "from langchain_community.embeddings import HuggingFaceBgeEmbeddings\n",
     "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
+    "from langchain.vectorstores import Chroma\n",
+    "\n",
+    "loader = PyPDFLoader(\"./Intel_AR_WR.pdf\")\n",
+    "langchain_documents = loader.load_and_split()\n",
+    "embeddings = HuggingFaceBgeEmbeddings(model_name=\"BAAI/bge-base-en-v1.5\")\n",
+    "knowledge_base = Chroma.from_documents(documents=langchain_documents, embedding=embeddings, persist_directory='./out')\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
+    "model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
+    "pipe = HuggingFacePipeline(pipeline=pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=128))\n",
+    "retriever = VectorStoreRetriever(vectorstore=knowledge_base, search_type='mmr', search_kwargs={'k':1, 'fetch_k':5})\n",
+    "retrievalQA = RetrievalQA.from_llm(llm=pipe, retriever=retriever)\n",
+    "result = retrievalQA({\"query\": \"What is IDM 2.0?\"})\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Chatbot code with ITREX Langchain extension APIs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline\n",
+    "from langchain.chains import RetrievalQA\n",
+    "from langchain_core.vectorstores import VectorStoreRetriever\n",
+    "from langchain_core.documents import Document\n",
+    "from intel_extension_for_transformers.langchain.embeddings import HuggingFaceBgeEmbeddings\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline\n",
     "from intel_extension_for_transformers.langchain.vectorstores import Chroma\n",
     "from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser\n",
     "\n",
     "document_parser = DocumentParser()\n",
-    "input_path=\"./412439%281%29_12_Intel_AR_WR.pdf\"\n",
+    "input_path=\"./Intel_AR_WR.pdf\"\n",
     "data_collection=document_parser.load(input=input_path)\n",
     "documents = []\n",
     "for data, meta in data_collection:\n",
     "    doc = Document(page_content=data, metadata={\"source\":meta})\n",
     "    documents.append(doc)\n",
-    "embeddings = HuggingFaceBgeEmbeddings(model_name=\"BAAI/bge-base-en-v1.5\")\n",
+    "# load Intel/bge-base-en-v1.5-sts-int8-static from local\n",
+    "embeddings = HuggingFaceBgeEmbeddings(model_name=\"./bge-base-en-v1.5-sts-int8-static\")\n",
     "knowledge_base = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory='./output')\n",
     "tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
     "model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-2-7b-chat-hf\")\n",
     "pipe = HuggingFacePipeline(pipeline=pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_new_tokens=128))\n",
-    "retriever = VectorStoreRetriever(vectorstore=knowledge_base)\n",
+    "retriever = VectorStoreRetriever(vectorstore=knowledge_base, search_type='mmr', search_kwargs={'k':1, 'fetch_k':5})\n",
     "retrievalQA = RetrievalQA.from_llm(llm=pipe, retriever=retriever)\n",
     "result = retrievalQA({\"query\": \"What is IDM 2.0?\"})\n",
     "print(result)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Comparing the execution time, using ITREX Langchain extension APIs can get better performance.\n",
+    "\n",
+    "| APIs   |   Execution Time   |\n",
+    "|-------|-------|\n",
+    "| Langchain  | 106.094 sec  |\n",
+    "| ITREX Langchain Extension | 81.429 sec  |\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -127,7 +181,7 @@
     "\n",
     "text_splitter = RecursiveCharacterTextSplitter(chunk_size=512)\n",
     "document_parser = DocumentParser()\n",
-    "input_path=\"./412439%281%29_12_Intel_AR_WR.pdf\"\n",
+    "input_path=\"./Intel_AR_WR.pdf\"\n",
     "data_collection=document_parser.load(input=input_path)\n",
     "langchain_documents = document_transfer(data_collection)\n",
     "child_documents = text_splitter.split_documents(langchain_documents)\n",
@@ -139,6 +193,23 @@
     "docs=retriever.get_relevant_documents(\"What is IDM 2.0?\")\n",
     "print(docs)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Comparing with result using default Langchain retriever, ITREX Langchain extension APIs can get better result.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "| Retrieval Type   |   Retrieval Result   |\n",
+    "|-------|-------|\n",
+    "| default  | The Smart Capital strategy helps the company leverage various sources of capital to support investments in manufacturing capacity and fund their IDM 2.0 strategy.  |\n",
+    "| ITREX Langchain Extension | Smart Capital for IDM 2.0 includes aggressive building out of manufacturing shell space, which gives flexibility in how and when to bring additional capacity online based on milestone triggers such as product readiness, market conditions, and customer commitments. It also involves government incentives, Strategic Capacity Investments (SCIP), customer commitments, and external foundries.   |"
+   ]
   }
  ],
  "metadata": {

diff --git a/...l_extension_for_transformers/neural_chat/examples/langchain_extension/README.md b/...l_extension_for_transformers/neural_chat/examples/langchain_extension/README.md
@@ -0,0 +1,62 @@
+# Introduction
+
+Intel Extension for Transformers provides a comprehensive suite of Langchain-based extension APIs, including advanced retrievers, embedding models, and vector stores. These enhancements are carefully crafted to expand the capabilities of the original langchain API, ultimately boosting overall performance. This extension is specifically tailored to enhance the functionality and performance of RAG.
+
+
+We have introduced enhanced vector store operations, allowing users to adjust and fine-tune their settings even after the chatbot has been initialized, providing a more adaptable and user-friendly experience. For Langchain users, integrating and utilizing optimized Vector Stores is straightforward by replacing the original Chroma API in Langchain.
+
+We offer optimized retrievers such as `VectorStoreRetriever` and `ChildParentRetriever` to efficiently handle vector store operations, ensuring optimal retrieval performance. Additionally, we provide quantized embedding models to accelerate embedding documents. These Langchain extension APIs are easy to use and are optimized for both performance and accuracy, specifically tailored for Intel hardware.
+
+# Setup Environment
+
+## Setup Conda
+
+First, you need to install and configure the Conda environment:
+
+```shell
+# Download and install Miniconda
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+bash Miniconda*.sh
+source ~/.bashrc
+```
+
+## Install numactl
+
+Next, install the numactl library:
+
+```shell
+sudo apt install numactl
+```
+
+## Install Intel Extension for Transformers
+
+```shell
+pip install intel-extension-for-transformers
+```
+
+## Install Python dependencies
+
+Install the following Python dependencies using Conda:
+
+```shell
+conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -y
+conda install jemalloc gperftools -c conda-forge -y
+conda install git-lfs -y
+```
+
+Install other dependencies using pip:
+
+```bash
+pip install -r ../../requirements.txt
+```
+
+Install retrieval plugin dependencies using pip:
+```bash
+pip install -r ../../pipeling/plugins/retrieval/requirements.txt
+```
+
+# Test
+
+```shell
+python main.py
+```
diff --git a/intel_extension_for_transformers/neural_chat/examples/langchain_extension/main.py b/intel_extension_for_transformers/neural_chat/examples/langchain_extension/main.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from langchain.chains import RetrievalQA
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain_core.documents import Document
+from langchain_community.embeddings import HuggingFaceBgeEmbeddings
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from intel_extension_for_transformers.langchain.vectorstores import Chroma
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
+import requests
+
+url = "https://d1io3yog0oux5.cloudfront.net/_897efe2d574a132883f198f2b119aa39/intel/db/888/8941/file/412439%281%29_12_Intel_AR_WR.pdf"
+filename = "Intel_AR_WR.pdf"
+response = requests.get(url)
+with open(filename, 'wb') as file:
+    file.write(response.content)
+print(f"File '{filename}' downloaded successfully.")
+
+document_parser = DocumentParser()
+input_path="./Intel_AR_WR.pdf"
+data_collection=document_parser.load(input=input_path)
+documents = []
+for data, meta in data_collection:
+    doc = Document(page_content=data, metadata={"source":meta})
+    documents.append(doc)
+embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5")
+knowledge_base = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory='./output')
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+pipe = HuggingFacePipeline(pipeline=pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=128))
+retriever = VectorStoreRetriever(vectorstore=knowledge_base)
+retrievalQA = RetrievalQA.from_llm(llm=pipe, retriever=retriever)
+result = retrievalQA({"query": "What is IDM 2.0?"})
+print(result)