Skip to content

Commit

Permalink
Merge remote-tracking branch 'ilavrenov_upstream/ct-beam-search' into…
Browse files Browse the repository at this point in the history
… n_support
  • Loading branch information
iefode committed Jun 4, 2024
2 parents 1128792 + 76148c5 commit 55448a1
Show file tree
Hide file tree
Showing 19 changed files with 709 additions and 177 deletions.
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[submodule "thirdparty/openvino_tokenizers"]
path = thirdparty/openvino_tokenizers
url = https://github.com/openvinotoolkit/openvino_tokenizers.git
branch = master
34 changes: 34 additions & 0 deletions text_generation/causal_lm/cpp/continuous_batching/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM ubuntu:22.04

ARG JOBS
WORKDIR /workspace
RUN apt-get update -y && apt-get install -y python3-pip python3-venv git

# Install OpenVINO
RUN git clone https://github.com/openvinotoolkit/openvino.git && \
cd /workspace/openvino && \
git submodule update --init -- /workspace/openvino/thirdparty/xbyak /workspace/openvino/thirdparty/pugixml /workspace/openvino/thirdparty/open_model_zoo \
/workspace/openvino/thirdparty/protobuf /workspace/openvino/thirdparty/snappy /workspace/openvino/thirdparty/telemetry /workspace/openvino/src/plugins/intel_cpu/thirdparty/mlas \
/workspace/openvino/src/plugins/intel_cpu/thirdparty/onednn /workspace/openvino/src/bindings/python/thirdparty/pybind11 && cd -

RUN /workspace/openvino/install_build_dependencies.sh
RUN python3 -m pip install -r /workspace/openvino/src/bindings/python/wheel/requirements-dev.txt
RUN cmake -DENABLE_PYTHON=ON -DENABLE_PYTHON_PACKAGING=ON -DENABLE_WHEEL=ON -DENABLE_CPPLINT=OFF -DENABLE_SAMPLES=OFF -DENABLE_INTEL_GPU=OFF \
-DENABLE_INTEL_NPU=OFF -DENABLE_TEMPLATE=OFF -DENABLE_AUTO=OFF -DENABLE_HETERO=OFF -DENABLE_AUTO_BATCH=OFF -DENABLE_OV_TF_FRONTEND=ON -DENABLE_OV_ONNX_FRONTEND=OFF \
-DENABLE_OV_TF_LITE_FRONTEND=OFF -DENABLE_OV_PADDLE_FRONTEND=OFF -S /workspace/openvino -B /workspace/openvino_build
RUN cmake --build /workspace/openvino_build --parallel $JOBS
RUN cmake -P /workspace/openvino_build/cmake_install.cmake
RUN python3 -m pip install /workspace/openvino_build/wheels/openvino-2024*
ENV OpenVINO_DIR=/workspace/openvino_build

# Download dataset
RUN wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

# Build continuous batching library
RUN git clone --branch request_rate https://github.com/mzegla/openvino.genai.git && cd /workspace/openvino.genai/text_generation/causal_lm/cpp/continuous_batching && \
git submodule update --remote --init && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ && cmake --build ./build/ -j $JOBS

# Install test dependencies
RUN python3 -m pip install --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly/ /workspace/openvino.genai/thirdparty/openvino_tokenizers
RUN PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" python3 -m pip install -r /workspace/openvino.genai/text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt
ENV PYTHONPATH=/workspace/openvino.genai/text_generation/causal_lm/cpp/continuous_batching/build/python
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <cxxopts.hpp>

#include "continuous_batching_pipeline.hpp"
#include "tokenizer.hpp"

void print_generation_result(const GenerationResult& generation_result) {
for (size_t output_id = 0; output_id < generation_result.m_generation_ids.size(); ++output_id) {
Expand Down Expand Up @@ -46,13 +47,14 @@ int main(int argc, char* argv[]) try {
std::vector<std::string> prompt_examples = {
"What is OpenVINO?",
"How are you?",
"OpenVINO is",
"What is the current time",
"What is your name?",
"Tell me something about Canada",
"What is OpenVINO?",
};

std::vector<GenerationConfig> sampling_params_examples {
// GenerationConfig::beam_search(),
// GenerationConfig::greedy(),
GenerationConfig::beam_search(),
GenerationConfig::greedy(),
GenerationConfig::multinomial(),
};

Expand All @@ -65,7 +67,7 @@ int main(int argc, char* argv[]) try {
}

// Perform the inference

SchedulerConfig scheduler_config {
// batch size
.max_num_batched_tokens = 32,
Expand All @@ -83,21 +85,20 @@ int main(int argc, char* argv[]) try {

for (size_t request_id = 0; request_id < generation_results.size(); ++request_id) {
const GenerationResult & generation_result = generation_results[request_id];

std::cout << "Question: " << prompts[request_id] << std::endl;
switch (generation_result.m_status)
{
case GenerationResultStatus::FINISHED:
case GenerationStatus::FINISHED:
print_generation_result(generation_result);
break;
case GenerationResultStatus::IGNORED:
case GenerationStatus::IGNORED:
std::cout << "Request was ignored due to lack of memory." <<std::endl;
if (generation_result.m_generation_ids.size() > 0) {
std::cout << "Partial result:" << std::endl;
print_generation_result(generation_result);
}
break;
case GenerationResultStatus::ABORTED:
case GenerationStatus::DROPPED_BY_PIPELINE:
std::cout << "Request was aborted." <<std::endl;
if (generation_result.m_generation_ids.size() > 0) {
std::cout << "Partial result:" << std::endl;
Expand All @@ -109,7 +110,6 @@ int main(int argc, char* argv[]) try {
}
std::cout << std::endl;
}

} catch (const std::exception& error) {
std::cerr << error.what() << '\n';
return EXIT_FAILURE;
Expand Down
Loading

0 comments on commit 55448a1

Please sign in to comment.