Skip to content

Commit

Permalink
Update NeuralChat README/Tutorial/Jupyter Notebook documents (#243)
Browse files Browse the repository at this point in the history
  • Loading branch information
ftian1 committed Sep 6, 2023
1 parent 9b81f05 commit b36711a
Show file tree
Hide file tree
Showing 10 changed files with 1,466 additions and 221 deletions.
1 change: 1 addition & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
push:
branches:
- main
workflow_dispatch:

jobs:
build:
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/script/formatScan/pylint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ $BOLD_YELLOW && echo "---------------- run python setup.py sdist bdist_wheel ---
#python3 -m build -s -w
export PYTHONPATH=`pwd`
#$BOLD_YELLOW && echo "---------------- pip install binary -------------" && $RESET
#pip install -e .
#pip install dist/intel_extension_for_transformers*.whl

pip list

cd /intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ Intel® Extension for Transformers
## 🚀 Latest News
<b> <span style="color:orange" > NeuralChat, a customizable chatbot framework under Intel® Extension for Transformers, is available for you to create your own chatbot within minutes on multiple architectures.</span></b>

NeuralChat offers a rich set of plugins to allow your personalized chatbot more smart with knowledge retrieval, more interactive through speech, faster through response caching, and more secure with guardrails.
NeuralChat offers a rich set of plugins to allow your personalized chatbot smarter with knowledge retrieval, more interactive through speech, faster through query caching, and more secure with guardrails.

* [Plugins] [Knowledge Retrieval](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/examples/retrieval), Speech Interaction, Response Caching, Security Guardrail
* [Plugins] [Knowledge Retrieval](./intel_extension_for_transformers/neural_chat/pipeline/plugins/retrieval/README.md), [Speech Interaction](./intel_extension_for_transformers/neural_chat/pipeline/plugins/audio/README.md), [Query Caching](./intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/README.md), [Security Guardrail](./intel_extension_for_transformers/neural_chat/pipeline/plugins/security/README.md)
* [Architectures] Intel® Xeon® Scalable Processors, Habana Gaudi® Accelerator, and others

Check out the below sample code and have a try now!
Expand Down
286 changes: 82 additions & 204 deletions intel_extension_for_transformers/neural_chat/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
From d89a5eeb2884d3f392f8b49b5b959f2f3a5a9a1e Mon Sep 17 00:00:00 2001
From: "Wang, Yi A" <yi.a.wang@intel.com>
Date: Thu, 31 Aug 2023 07:09:03 +0530
Subject: [PATCH] fix lm head overriden issue, move it from checkpoint in-loop
loading to out loop

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
---
deepspeed/module_inject/load_checkpoint.py | 7 -------
deepspeed/module_inject/replace_module.py | 22 ++++++++++++++--------
2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/deepspeed/module_inject/load_checkpoint.py b/deepspeed/module_inject/load_checkpoint.py
index 5c2835f..3b225c6 100644
--- a/deepspeed/module_inject/load_checkpoint.py
+++ b/deepspeed/module_inject/load_checkpoint.py
@@ -255,13 +255,6 @@ def load_model_with_checkpoint(r_module,

load_module_recursive(r_module)

- embedding_weight = None
-
- for n, p in r_module.named_parameters():
- if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
- embedding_weight = p
- if embedding_weight is not None and r_module.lm_head.weight.is_meta:
- r_module.lm_head.weight = embedding_weight
for sd_ in sd:
del sd_
sd = None
diff --git a/deepspeed/module_inject/replace_module.py b/deepspeed/module_inject/replace_module.py
index 7844d2f..316c2c6 100644
--- a/deepspeed/module_inject/replace_module.py
+++ b/deepspeed/module_inject/replace_module.py
@@ -548,6 +548,13 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
checkpoint=checkpoint[i])
pbar.update(1)
gc.collect()
+ embedding_weight = None
+ for n, p in replaced_module.named_parameters():
+ if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
+ embedding_weight = p
+ if embedding_weight is not None and hasattr(replaced_module, "lm_head") and hasattr(
+ replaced_module.lm_head, "weight") and replaced_module.lm_head.weight.is_meta:
+ replaced_module.lm_head.weight = embedding_weight
else:
replaced_module = replace_module(model=model,
orig_class=orig_layer_impl,
@@ -626,6 +633,13 @@ def replace_transformer_layer(orig_layer_impl, model, checkpoint_dict, config, m
container=container_g)
sds = [None for _ in sds]
gc.collect()
+ embedding_weight = None
+ for n, p in replaced_module.named_parameters():
+ if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
+ embedding_weight = p
+ if embedding_weight is not None and hasattr(replaced_module, "lm_head") and hasattr(
+ replaced_module.lm_head, "weight") and replaced_module.lm_head.weight.is_meta:
+ replaced_module.lm_head.weight = embedding_weight
print(f"checkpoint loading time at rank {rank}: {time.time()-start_time} sec")

if config.save_mp_checkpoint_path is not None:
@@ -794,14 +808,6 @@ def replace_module(model, orig_class, replace_fn, _replace_policy, checkpoint=No
"You can find some samples here: https://github.com/microsoft/DeepSpeed/blob/master/deepspeed/module_inject/replace_policy.py"

replaced_module, _ = _replace_module(model, policy, state_dict=sd)
- if checkpoint != None:
- embedding_weight = None
- for n, p in replaced_module.named_parameters():
- if "word_embeddings." in n or "embed_tokens." in n or "wte." in n:
- embedding_weight = p
- if embedding_weight is not None and hasattr(replaced_module, "lm_head") and hasattr(
- replaced_module.lm_head, "weight") and replaced_module.lm_head.weight.is_meta:
- replaced_module.lm_head.weight = embedding_weight
return replaced_module


--
2.39.3

Original file line number Diff line number Diff line change
Expand Up @@ -65,40 +65,55 @@ RUN source activate && conda activate chatbot-demo && \
conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -y && \
conda install jemalloc gperftools -c conda-forge -y && \
conda install pytorch torchvision torchaudio cpuonly -c pytorch && \
pip install farm-haystack==1.14.0 && \
pip install intel_extension_for_pytorch && \
pip install optimum-intel && \
pip install transformers diffusers accelerate SentencePiece peft evaluate nltk datasets && \
pip install fastapi uvicorn sse_starlette bottle gevent pymysql && \
pip install uvicorn sse_starlette bottle gevent pymysql && \
pip install schema && \
pip install datasets torch transformers sentencepiece peft evaluate nltk rouge_score && \
cd /root/chatbot && git clone https://github.com/intel/intel-extension-for-transformers.git \
&& cd ./intel-extension-for-transformers/workflows/chatbot/inference/ && pip install -r requirements.txt
&& cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ && pip install -r requirements.txt

WORKDIR /root/chatbot/intel-extension-for-transformers/workflows/chatbot/inference/
ENV KMP_BLOCKTIME=1
ENV KMP_SETTINGS=1
ENV KMP_AFFINITY=granularity=fine,compact,1,0
ENV LD_PRELOAD=${CONDA_PREFIX}/lib/libiomp5.so
ENV LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so

WORKDIR /root/chatbot/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/


# HABANA environment
FROM vault.habana.ai/gaudi-docker/1.10.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest as hpu
FROM vault.habana.ai/gaudi-docker/1.11.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest as hpu

ENV LANG=en_US.UTF-8
ENV PYTHONPATH=/root:/usr/lib/habanalabs/

RUN git clone https://github.com/huggingface/optimum-habana.git && \
cd ./optimum-habana/examples/text-generation/ && \
pip install -r requirements.txt && \
apt-get update && \
RUN apt-get update && \
apt-get install git-lfs && \
git-lfs install

RUN pip install git+https://github.com/huggingface/optimum-habana.git && \
COPY 0001-fix-lm-head-overriden-issue-move-it-from-checkpoint-.patch /

RUN git clone https://github.com/huggingface/optimum-habana.git && \
cd optimum-habana/ && git reset --hard b6edce65b70e0fadd5d5f51234700bd1144cd0b0 && pip install -e . && cd ../ && \
cd ./optimum-habana/examples/text-generation/ && \
pip install -r requirements.txt && \
cd / && \
pip install peft && \
pip install einops && \
pip install datasets && \
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.10.0
git clone https://github.com/HabanaAI/DeepSpeed.git && \
cd DeepSpeed && \
git checkout -b v1.11 origin/1.11.0 && \
git apply /0001-fix-lm-head-overriden-issue-move-it-from-checkpoint-.patch && \
pip install -e .

RUN git clone https://github.com/intel/intel-extension-for-transformers.git \
&& cd ./intel-extension-for-transformers/workflows/chatbot/inference/ \
&& pip install -r requirements.txt
&& cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ \
&& pip install -r requirements.txt \
&& pip install transformers==4.32.0 \
&& pip install accelerate==0.22.0 \
&& pip uninstall -y intel_extension_for_pytorch

WORKDIR /intel-extension-for-transformers/workflows/chatbot/inference/
WORKDIR /intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/

0 comments on commit b36711a

Please sign in to comment.