Update NeuralChat README/Tutorial/Jupyter Notebook documents (#243)

intel · Sep 6, 2023 · b36711a · b36711a
1 parent 9b81f05
commit b36711a
Show file tree

Hide file tree

Showing 10 changed files with 1,466 additions and 221 deletions.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - main
+  workflow_dispatch:
 
 jobs:
   build:

diff --git a/.github/workflows/script/formatScan/pylint.sh b/.github/workflows/script/formatScan/pylint.sh
@@ -11,7 +11,8 @@ $BOLD_YELLOW && echo "---------------- run python setup.py sdist bdist_wheel ---
 #python3 -m build -s -w
 export PYTHONPATH=`pwd`
 #$BOLD_YELLOW && echo "---------------- pip install binary -------------" && $RESET
-#pip install -e .
+#pip install dist/intel_extension_for_transformers*.whl
+
 pip list
 
 cd /intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/

diff --git a/README.md b/README.md
@@ -10,9 +10,9 @@ Intel® Extension for Transformers
 ## 🚀 Latest News
 <b> <span style="color:orange" > NeuralChat, a customizable chatbot framework under Intel® Extension for Transformers, is available for you to create your own chatbot within minutes on multiple architectures.</span></b>
 
-NeuralChat offers a rich set of plugins to allow your personalized chatbot more smart with knowledge retrieval, more interactive through speech, faster through response caching, and more secure with guardrails.
+NeuralChat offers a rich set of plugins to allow your personalized chatbot smarter with knowledge retrieval, more interactive through speech, faster through query caching, and more secure with guardrails.
 
-* [Plugins] [Knowledge Retrieval](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/examples/retrieval), Speech Interaction, Response Caching, Security Guardrail
+* [Plugins] [Knowledge Retrieval](./intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/README.md), [Speech Interaction](./intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/README.md), [Query Caching](./intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/README.md), [Security Guardrail](./intel_extension_for_transformers/neural_chat/pipeline/plugins/security/README.md)
 * [Architectures] Intel® Xeon® Scalable Processors, Habana Gaudi® Accelerator, and others
 
 Check out the below sample code and have a try now!

diff --git a/intel_extension_for_transformers/neural_chat/README.md b/intel_extension_for_transformers/neural_chat/README.md
diff --git a/...ral_chat/docker/inference/0001-fix-lm-head-overriden-issue-move-it-from-checkpoint-.patch b/...ral_chat/docker/inference/0001-fix-lm-head-overriden-issue-move-it-from-checkpoint-.patch
@@ -0,0 +1,80 @@
+From d89a5eeb2884d3f392f8b49b5b959f2f3a5a9a1e Mon Sep 17 00:00:00 2001
+From: "Wang, Yi A" <yi.a.wang@intel.com>
+Date: Thu, 31 Aug 2023 07:09:03 +0530
+Subject: [PATCH] fix lm head overriden issue, move it from checkpoint in-loop
+ loading to out loop
+
+Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
+---
+ deepspeed/module_inject/load_checkpoint.py |  7 -------
+ deepspeed/module_inject/replace_module.py  | 22 ++++++++++++++--------
+ 2 files changed, 14 insertions(+), 15 deletions(-)
+
+diff --git a/deepspeed/module_inject/load_checkpoint.py b/deepspeed/module_inject/load_checkpoint.py
+index 5c2835f..3b225c6 100644
+--- a/deepspeed/module_inject/load_checkpoint.py
++++ b/deepspeed/module_inject/load_checkpoint.py
+@@ -255,13 +255,6 @@ def load_model_with_checkpoint(r_module,
+
+     load_module_recursive(r_module)
+
+-    embedding_weight = None
+-
+-    for n, p in r_module.named_parameters():
+-        if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
+-            embedding_weight = p
+-    if embedding_weight is not None and r_module.lm_head.weight.is_meta:
+-        r_module.lm_head.weight = embedding_weight
+     for sd_ in sd:
+         del sd_
+     sd = None
+diff --git a/deepspeed/module_inject/replace_module.py b/deepspeed/module_inject/replace_module.py
+index 7844d2f..316c2c6 100644
+--- a/deepspeed/module_inject/replace_module.py
++++ b/deepspeed/module_inject/replace_module.py
+@@ -548,6 +548,13 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
+                                              checkpoint=checkpoint[i])
+             pbar.update(1)
+             gc.collect()
++        embedding_weight = None
++        for n, p in replaced_module.named_parameters():
++            if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
++                embedding_weight = p
++        if embedding_weight is not None and hasattr(replaced_module, "lm_head") and hasattr(
++                replaced_module.lm_head, "weight") and replaced_module.lm_head.weight.is_meta:
++            replaced_module.lm_head.weight = embedding_weight
+     else:
+         replaced_module = replace_module(model=model,
+                                          orig_class=orig_layer_impl,
+@@ -626,6 +633,13 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
+                                                container=container_g)
+                     sds = [None for _ in sds]
+                     gc.collect()
++        embedding_weight = None
++        for n, p in replaced_module.named_parameters():
++            if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
++                embedding_weight = p
++        if embedding_weight is not None and hasattr(replaced_module, "lm_head") and hasattr(
++                replaced_module.lm_head, "weight") and replaced_module.lm_head.weight.is_meta:
++            replaced_module.lm_head.weight = embedding_weight
+         print(f"checkpoint loading time at rank {rank}: {time.time()-start_time} sec")
+
+     if config.save_mp_checkpoint_path is not None:
+@@ -794,14 +808,6 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
+         "You can find some samples here: https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/module_inject/replace_policy.py"
+
+     replaced_module, _ = _replace_module(model, policy, state_dict=sd)
+-    if checkpoint != None:
+-        embedding_weight = None
+-        for n, p in replaced_module.named_parameters():
+-            if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
+-                embedding_weight = p
+-        if embedding_weight is not None and hasattr(replaced_module, "lm_head") and hasattr(
+-                replaced_module.lm_head, "weight") and replaced_module.lm_head.weight.is_meta:
+-            replaced_module.lm_head.weight = embedding_weight
+     return replaced_module
+
+
+-- 
+2.39.3
+
diff --git a/intel_extension_for_transformers/neural_chat/docker/inference/Dockerfile b/intel_extension_for_transformers/neural_chat/docker/inference/Dockerfile
@@ -65,40 +65,55 @@ RUN source activate && conda activate chatbot-demo && \
     conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -y && \
     conda install jemalloc gperftools -c conda-forge -y && \
     conda install pytorch torchvision torchaudio cpuonly -c pytorch && \
-    pip install farm-haystack==1.14.0 && \
     pip install intel_extension_for_pytorch && \
     pip install optimum-intel && \
     pip install transformers diffusers accelerate SentencePiece peft evaluate nltk datasets && \
-    pip install fastapi uvicorn sse_starlette bottle gevent pymysql && \
+    pip install uvicorn sse_starlette bottle gevent pymysql && \
     pip install schema && \
     pip install datasets torch transformers sentencepiece peft evaluate nltk rouge_score && \
     cd /root/chatbot && git clone https://github.com/intel/intel-extension-for-transformers.git \
-    && cd ./intel-extension-for-transformers/workflows/chatbot/inference/ && pip install -r requirements.txt
+    && cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ && pip install -r requirements.txt
 
-WORKDIR /root/chatbot/intel-extension-for-transformers/workflows/chatbot/inference/
+ENV KMP_BLOCKTIME=1
+ENV KMP_SETTINGS=1
+ENV KMP_AFFINITY=granularity=fine,compact,1,0
+ENV LD_PRELOAD=${CONDA_PREFIX}/lib/libiomp5.so
+ENV LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so
+
+WORKDIR /root/chatbot/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/
 
 
 # HABANA environment
-FROM vault.habana.ai/gaudi-docker/1.10.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest as hpu
+FROM vault.habana.ai/gaudi-docker/1.11.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest as hpu
 
 ENV LANG=en_US.UTF-8
 ENV PYTHONPATH=/root:/usr/lib/habanalabs/
 
-RUN git clone https://github.com/huggingface/optimum-habana.git && \
-    cd ./optimum-habana/examples/text-generation/ && \
-    pip install -r requirements.txt && \
-    apt-get update && \
+RUN apt-get update && \
     apt-get install git-lfs && \
     git-lfs install
 
-RUN pip install git+https://github.com/huggingface/optimum-habana.git && \
+COPY 0001-fix-lm-head-overriden-issue-move-it-from-checkpoint-.patch /
+
+RUN git clone https://github.com/huggingface/optimum-habana.git && \
+    cd optimum-habana/ && git reset --hard b6edce65b70e0fadd5d5f51234700bd1144cd0b0 && pip install -e . && cd ../ && \
+    cd ./optimum-habana/examples/text-generation/ && \
+    pip install -r requirements.txt && \
+    cd / && \
     pip install peft && \
     pip install einops && \
     pip install datasets && \
-    pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.10.0
+    git clone https://github.com/HabanaAI/DeepSpeed.git && \
+    cd DeepSpeed && \
+    git checkout -b v1.11 origin/1.11.0 && \
+    git apply /0001-fix-lm-head-overriden-issue-move-it-from-checkpoint-.patch && \ 
+    pip install -e . 
 
 RUN git clone https://github.com/intel/intel-extension-for-transformers.git \
-    && cd ./intel-extension-for-transformers/workflows/chatbot/inference/ \
-    && pip install -r requirements.txt
+    && cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ \
+    && pip install -r requirements.txt \
+    && pip install transformers==4.32.0 \
+    && pip install accelerate==0.22.0  \
+    && pip uninstall -y intel_extension_for_pytorch
 
-WORKDIR /intel-extension-for-transformers/workflows/chatbot/inference/
+WORKDIR /intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/