Dockerfile support for both Xeon SPR and Habana Gaudi (#1135)

intel · Jul 6, 2023 · 37b4552 · 37b4552
1 parent 644de8f
commit 37b4552
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 16 deletions.
diff --git a/workflows/chatbot/inference/docker/Dockerfile b/workflows/chatbot/inference/docker/Dockerfile
@@ -1,5 +1,7 @@
-ARG UBUNTU_VER=20.04
-FROM ubuntu:${UBUNTU_VER} as deploy
+
+## SPR environment
+ARG UBUNTU_VER=22.04
+FROM ubuntu:${UBUNTU_VER} as cpu
 
 # See http://bugs.python.org/issue19846
 ENV LANG C.UTF-8
@@ -11,6 +13,7 @@ RUN apt-get update \
     && apt-get install -y build-essential \
     && apt-get install -y wget numactl git \
     && apt-get clean \
+    && apt-get install git \
     && rm -rf /var/lib/apt/lists/*
 
 # Install miniconda
@@ -19,12 +22,58 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
     /bin/bash ~/miniconda.sh -b -p /opt/conda
 
 # Put conda in path so we can use conda activate
-SHELL ["/bin/bash", "--login", "-c"]
+SHELL ["/bin/bash", "--conda run", "-c"]
 ENV PATH=$CONDA_DIR/bin:$PATH
 RUN conda init bash && \
     unset -f conda && \
     export PATH=$CONDA_DIR/bin/:${PATH} && \
     conda config --add channels intel && \
-    conda install python==3.9
+    # conda install python==3.9 \
+    conda create -yn chatbot-demo python=3.9 && \
+    echo "conda activate chatbot-demo" >> ~/.bashrc
+
+SHELL ["conda", "run", "-n", "chatbot-demo", "/bin/bash", "--login", "-c"]
+
+RUN conda install astunparse ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -y && \
+    conda install jemalloc gperftools -c conda-forge -y && \
+    conda install pytorch torchvision torchaudio cpuonly -c pytorch && \
+    pip install farm-haystack==1.14.0 && \
+    pip install intel_extension_for_pytorch && \
+    pip install transformers diffusers accelerate SentencePiece peft evaluate nltk datasets && \
+    pip install fastapi uvicorn sse_starlette bottle gevent pymysql && \
+    pip install schema && \
+    pip install -i https://test.pypi.org/simple/ intel-extension-for-transformers==1.0.0.dev20230602 && \
+    pip install datasets torch transformers sentencepiece peft evaluate nltk rouge_score
+
+
+RUN cd /root/chatbot && git clone https://github.com/intel/intel-extension-for-transformers.git \
+    && cd ./intel-extension-for-transformers/workflows/chatbot/inference/ && pip install -r requirements.txt
+
+WORKDIR /root/chatbot/intel-extension-for-transformers/workflows/chatbot/inference/
+
+
+# HABANA environment
+FROM vault.habana.ai/gaudi-docker/1.10.0/ubuntu22.04/habanalabs/pytorch-installer-2.0.1:latest as hpu
+
+ENV LANG=en_US.UTF-8
+ENV PYTHONPATH=/root:/usr/lib/habanalabs/
+
+RUN git clone https://github.com/huggingface/optimum-habana.git && \
+    cd ./optimum-habana/examples/text-generation/ && \
+    pip install -r requirements.txt && \
+    apt-get update && \
+    apt-get install git-lfs && \
+    git-lfs install
+
+RUN pip install optimum[habana] && \
+    pip install peft && \
+    pip install einops && \
+    pip install datasets && \
+    pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.10.0
+
+RUN git clone https://github.com/intel/intel-extension-for-transformers.git \
+    && cd ./intel-extension-for-transformers/workflows/chatbot/inference/ \
+    && pip install -r requirements.txt && \
+    git clone https://huggingface.co/mosaicml/mpt-7b-chat
 
-RUN pip install datasets torch transformers sentencepiece peft evaluate nltk rouge_score
+WORKDIR /intel-extension-for-transformers/workflows/chatbot/inference/
diff --git a/workflows/chatbot/inference/docker/README.md b/workflows/chatbot/inference/docker/README.md
@@ -2,27 +2,45 @@ Intel Chatbot Inference Dockerfile installer for Ubuntu22.04
 
 # Do chatbot inference with Docker
 
-## Build
+## Environment Setup
 
+### Setup Xeon SPR Environment
 ```
-docker build ./ --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${http_proxy} -f Dockerfile -t chatbotinfer:latest
+docker build --network=host --tag chatbotinfer:latest  ./ -f Dockerfile  --target cpu  
 ```
 
-## Run
-
-### 1. Run the container, establish the mapping of the model files and enter into the container
-
 ```
-docker run -it -v /dev/shm/models--google--flan-t5-xl:/root/.cache/models--google--flan-t5-xl -v .:/root/chatbot chatbotinfer:latest
+docker run -it -v /dev/shm/models--google--flan-t5-xl:/root/.cache/models--google--flan-t5-xl chatbotinfer:latest
 ```
 
 If you have already cached the original model and the lora model, you may replace the `-v` parameter to map the cached models on your host machine to the location inside your Docker container.
 
-### 2. Inside the container, do the inference
 
+### Setup Habana Gaudi Environment
+```
+DOCKER_BUILDKIT=1 docker build --network=host --tag chatbothabana:latest  ./ -f Dockerfile  --target hpu --build-arg BASE_NAME="base-installer-ubuntu22.04" --build-arg ARTIFACTORY_URL="vault.habana.ai" --build-arg VERSION="1.10.0" --build-arg REVISION="494" --build-arg PT_VERSION="2.0.1" --build-arg OS_NUMBER="2204"
+```
 ```
-python generate.py \
-        --base_model_path "google/flan-t5-xl" \
-        --lora_model_path "./flan-t5-xl_peft_finetuned_model" \
+docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host chatbothabana:latest 
+```
+## Run the Inference
+You can use the generate.py script for performing direct inference on Habana Gaudi instance. We have enabled BF16 to speed up the inference. Please use the following command for inference.
+### Run the Inference on Xeon SPR
+```
+python generation.py \
+        --base_model_path "./mpt-7b-chat" \
+        --use_kv_cache \
+        --bf16 \
+        --use_slow_tokenizer \
         --instructions "Transform the following sentence into one that shows contrast. The tree is rotten."
 ```
+### Run the Inference on Habana Gaudi
+```
+python generation.py \
+        --base_model_path "./mpt-7b-chat" \
+        --use_kv_cache \
+        --bf16 \
+        --use_slow_tokenizer \
+        --habana \
+        --instructions "Transform the following sentence into one that shows contrast. The tree is rotten."
+```