New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add python backend support #86
Changes from all commits
01f1cbb
4f936c3
2a91018
c6be129
b3bf26b
fa423d1
f0a12b5
7ea388f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
FROM moyix/triton_with_ft:22.09 | ||
|
||
# Install dependencies: torch | ||
RUN python3 -m pip install --disable-pip-version-check -U torch --extra-index-url https://download.pytorch.org/whl/cu116 | ||
RUN python3 -m pip install --disable-pip-version-check -U transformers bitsandbytes accelerate |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,14 @@ | ||
version: '3.3' | ||
services: | ||
triton: | ||
image: moyix/triton_with_ft:22.09 | ||
build: | ||
context: . | ||
dockerfile: Dockerfile | ||
command: bash -c "CUDA_VISIBLE_DEVICES=${GPUS} mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model" | ||
shm_size: '2gb' | ||
volumes: | ||
- ${MODEL_DIR}:/model | ||
- ${HF_CACHE_DIR}:/root/.cache/huggingface | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If no There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current version sets HF_CACHE_DIR to I could default it to true, and warn the user about the permission issues. Not sure which is a good option. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @thakkarparth007 if a user does not use the cache, the volume is set to empty, which can not be mounted and causes the I think adding something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Whoops, I just saw your comment @fdegier So currently if you notice in the setup.sh (https://github.com/moyix/fauxpilot/pull/86/files#diff-4209d788ad32c40cbda3c66b3de47eefb929308ca703bb77a6382625986add17R148) then you'll see that HF_CACHE_DIR is being set to But yes, perhaps it'll be better to store the cache in the fauxpilot directory itself. Updated it! |
||
ports: | ||
- "8000:8000" | ||
- "8001:8001" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
name: "py-model" | ||
backend: "python" | ||
max_batch_size: 4 | ||
input [ | ||
{ | ||
name: "input_ids" | ||
data_type: TYPE_INT32 | ||
dims: [ -1 ] | ||
}, | ||
{ | ||
# UNUSED | ||
name: "start_id" | ||
data_type: TYPE_INT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "end_id" | ||
data_type: TYPE_INT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "input_lengths" | ||
data_type: TYPE_INT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
}, | ||
{ | ||
name: "request_output_len" | ||
data_type: TYPE_INT32 | ||
dims: [ -1 ] | ||
}, | ||
{ | ||
name: "runtime_top_k" | ||
data_type: TYPE_INT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "runtime_top_p" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "beam_search_diversity_rate" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
name: "temperature" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "len_penalty" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "repetition_penalty" | ||
data_type: TYPE_FP32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "random_seed" | ||
data_type: TYPE_INT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "is_return_log_probs" | ||
data_type: TYPE_BOOL | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "beam_width" | ||
data_type: TYPE_INT32 | ||
dims: [ 1 ] | ||
reshape: { shape: [ ] } | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "bad_words_list" | ||
data_type: TYPE_INT32 | ||
dims: [ 2, -1 ] | ||
optional: true | ||
}, | ||
{ | ||
# UNUSED | ||
name: "stop_words_list" | ||
data_type: TYPE_INT32 | ||
dims: [ 2, -1 ] | ||
optional: true | ||
} | ||
] | ||
output [ | ||
{ | ||
name: "output_ids" | ||
data_type: TYPE_INT32 | ||
dims: [ -1, -1, -1 ] | ||
}, | ||
{ | ||
name: "sequence_length" | ||
data_type: TYPE_INT32 | ||
dims: [ -1, -1 ] | ||
} #, | ||
# Following is currently unsupported, but should be supported in the future | ||
# { | ||
# name: "cum_log_probs" | ||
# data_type: TYPE_FP32 | ||
# dims: [ -1 ] | ||
# }, | ||
# { | ||
# name: "output_log_probs" | ||
# data_type: TYPE_FP32 | ||
# dims: [ -1, -1 ] | ||
# } | ||
] | ||
# unsure what this is for | ||
instance_group [ | ||
{ | ||
count: 1 | ||
kind: KIND_CPU | ||
} | ||
] | ||
parameters { | ||
key: "use_half" | ||
value: { | ||
string_value: "1" | ||
} | ||
} | ||
parameters { | ||
key: "model_name" | ||
value: { | ||
string_value: "${model_name}" # e.g. "codegen-350M-multi" | ||
} | ||
} | ||
parameters { | ||
key: "org_name" | ||
value: { | ||
string_value: "${org_name}" # e.g. "Salesforce" | ||
} | ||
} | ||
parameters { | ||
key: "use_int8", | ||
value: { | ||
string_value: "${use_int8}" # e.g. "0" or "1" | ||
} | ||
} | ||
parameters { | ||
key: "use_auto_device_map", | ||
value: { | ||
string_value: "${use_auto_device_map}" # e.g. "0" or "1" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
""" | ||
A simple script that sets up the model directory of a given model for Triton. | ||
""" | ||
|
||
import argparse | ||
import os | ||
import shutil | ||
from pathlib import Path | ||
from string import Template | ||
|
||
SCRIPT_DIR = Path(__file__).parent | ||
CONFIG_TEMPLATE_PATH = os.path.join(SCRIPT_DIR, 'config_template.pbtxt') | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--model_dir", type=str, required=True) | ||
parser.add_argument("--model_name", type=str, required=True) | ||
parser.add_argument("--org_name", type=str, required=True) | ||
parser.add_argument("--use_half", type=str, default="1") | ||
parser.add_argument("--use_int8", type=str, default="0") | ||
parser.add_argument("--use_auto_device_map", type=str, default="1") | ||
args = parser.parse_args() | ||
|
||
|
||
# Step1: Make model directory | ||
model_dir_path = Path(os.path.join(Path(args.model_dir), f"py-{args.org_name}-{args.model_name}/py-model/1")) | ||
model_dir_path.mkdir(parents=True, exist_ok=True) | ||
|
||
# Step 2: copy model.py | ||
shutil.copy(os.path.join(SCRIPT_DIR, 'model.py'), os.path.join(model_dir_path, 'model.py')) | ||
|
||
# Step 3: Generate config.pbtxt | ||
with open(CONFIG_TEMPLATE_PATH, 'r') as f: | ||
template = Template(f.read()) | ||
|
||
config = template.substitute( | ||
org_name=args.org_name, | ||
model_name=args.model_name, | ||
use_half=args.use_half, | ||
use_int8=args.use_int8, | ||
use_auto_device_map=args.use_auto_device_map, | ||
) | ||
with open(os.path.join(model_dir_path, '../config.pbtxt'), 'w') as f: | ||
f.write(config) | ||
print(f"Config written to {os.path.abspath(f.name)}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As mentioned in my other comment, I think the dependencies should be added in
moyix/triton_with_ft
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good to me. I can make a PR to that repo instead