Skip to content

Commit

Permalink
return correct input token len without pad to user (#177)
Browse files Browse the repository at this point in the history
Signed-off-by: Feng, Jiqing <jiqing.feng@intel.com>
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
  • Loading branch information
sywangyi committed Aug 30, 2023
1 parent d114b27 commit 18be4c7
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 18 deletions.
11 changes: 6 additions & 5 deletions workflows/chatbot/fine_tuning/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,16 @@ FROM vault.habana.ai/gaudi-docker/1.10.0/ubuntu22.04/habanalabs/pytorch-installe
ENV LANG=en_US.UTF-8
ENV PYTHONPATH=/root:/usr/lib/habanalabs/

RUN apt-get update && \
apt-get install git-lfs && \
git-lfs install

# Install optimum-habana
RUN git clone https://github.com/huggingface/optimum-habana.git && \
cd optimum-habana/ && git reset --hard 9570fb8f359ef458fddfb4040e2280d5fec0fd11 && pip install -e . && cd ../ && \
cd ./optimum-habana/examples/text-generation/ && \
pip install -r requirements.txt && \
apt-get update && \
apt-get install git-lfs && \
git-lfs install

RUN pip install git+https://github.com/huggingface/optimum-habana.git && \
cd / && \
pip install einops && \
pip install datasets && \
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.10.0 && \
Expand Down
14 changes: 8 additions & 6 deletions workflows/chatbot/inference/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -83,21 +83,23 @@ FROM vault.habana.ai/gaudi-docker/1.10.0/ubuntu22.04/habanalabs/pytorch-installe
ENV LANG=en_US.UTF-8
ENV PYTHONPATH=/root:/usr/lib/habanalabs/

RUN git clone https://github.com/huggingface/optimum-habana.git && \
cd ./optimum-habana/examples/text-generation/ && \
pip install -r requirements.txt && \
apt-get update && \
RUN apt-get update && \
apt-get install git-lfs && \
git-lfs install

RUN pip install git+https://github.com/huggingface/optimum-habana.git && \
RUN git clone https://github.com/huggingface/optimum-habana.git && \
cd optimum-habana/ && git reset --hard 9570fb8f359ef458fddfb4040e2280d5fec0fd11 && pip install -e . && cd ../ && \
cd ./optimum-habana/examples/text-generation/ && \
pip install -r requirements.txt && \
cd / && \
pip install peft && \
pip install einops && \
pip install datasets && \
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.10.0

RUN git clone https://github.com/intel/intel-extension-for-transformers.git \
&& cd ./intel-extension-for-transformers/workflows/chatbot/inference/ \
&& pip install -r requirements.txt
&& pip install -r requirements.txt \
&& pip install transformers==4.28.1

WORKDIR /intel-extension-for-transformers/workflows/chatbot/inference/
23 changes: 16 additions & 7 deletions workflows/chatbot/inference/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,16 @@ def __call__(
return False


def max_input_len(model, outlen=0):
# need to adjust due to perf and real usage
return 128
def max_input_len(input_text_length):
if input_text_length <= 128:
return 128
elif input_text_length <= 512:
return 512
elif input_text_length <= 2048:
return 2048
else:
logger.warning("Max support length is 4096")
return 4096


def add_template(example, template_name):
Expand Down Expand Up @@ -624,13 +631,14 @@ def generate_output():
generation_thread = Thread(target=generate_output)
generation_thread.start()
elif device == "hpu":
input_tokens_no_pad = tokenizer([prompt], return_tensors="pt")
input_token_len = input_tokens_no_pad.input_ids.shape[-1]
input_tokens = tokenizer.batch_encode_plus(
[prompt],
return_tensors="pt",
padding="max_length",
max_length=max_input_len(model, max_new_tokens),
max_length=max_input_len(input_token_len),
)
input_token_len = input_tokens.input_ids.shape[-1]
if isinstance(model.generation_config.eos_token_id, list):
stop_token_ids = copy.deepcopy(model.generation_config.eos_token_id)
else:
Expand Down Expand Up @@ -849,13 +857,14 @@ def predict(**params):
)
generation_output = model.generate(**input_tokens, **generation_kwargs)
elif device == "hpu":
input_tokens_no_pad = tokenizer([prompt], return_tensors="pt")
input_token_len = input_tokens_no_pad.input_ids.shape[-1]
input_tokens = tokenizer.batch_encode_plus(
[prompt],
return_tensors="pt",
padding="max_length",
max_length=max_input_len(model, max_new_tokens),
max_length=max_input_len(input_token_len),
)
input_token_len = input_tokens.input_ids.shape[-1]
if isinstance(model.generation_config.eos_token_id, list):
stop_token_ids = copy.deepcopy(model.generation_config.eos_token_id)
else:
Expand Down

0 comments on commit 18be4c7

Please sign in to comment.