Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,10 @@ hf-1b-GPT2-mqa1-int8:

ds-inference-1b-GPT2-mqa1-fp16:
deepspeed --num_gpus 1 src/main.py --hidden_size 2048 --n_head 16 --n_layer 24 --pipeline_class DS_Inference_Pipeline --model_class GPT2 --n_positions 2048 --attention_type 3 --batch_size ${batch_size}

# Input length experiments
hf-1b-GPT2-mqa1-int8-input-length:
python src/main.py --hidden_size 2048 --n_head 16 --n_layer 24 --pipeline_class HF_GPU_Pipeline --model_class GPT2 --n_positions 2048 --attention_type 3 --dtype int8 --batch_size ${batch_size} --max_input_length ${max_input_length}

hf-1b-GPT2-mha-int8-input-length:
python src/main.py --hidden_size 2048 --n_head 16 --n_layer 24 --pipeline_class HF_GPU_Pipeline --model_class GPT2 --n_positions 2048 --attention_type 1 --dtype int8 --batch_size ${batch_size} --max_input_length ${max_input_length}
Empty file modified run.sh
100644 → 100755
Empty file.
8 changes: 8 additions & 0 deletions run_input_length.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
export CUDA_VISIBLE_DEVICES=0

rm -rf ./tmp

for max_input_length in {4,8,16,32,64,128,256,512,1024,1536,1900}
do
make $1 batch_size=32 max_input_length=$max_input_length
done
12 changes: 9 additions & 3 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import pipelines
from utils import benchmark_end_to_end, get_arg_parser, get_args, get_dummy_batch

from utils import benchmark_end_to_end, get_arg_parser, get_args, get_dummy_batch, get_dummy_batch_tokenizer
from transformers import AutoTokenizer

def main() -> None:
# deepspeed.init_distributed("nccl")

args = get_args(get_arg_parser())

inputs = get_dummy_batch(args.batch_size)
if args.max_input_length == -1:
inputs = get_dummy_batch(args.batch_size)
else:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
inputs = get_dummy_batch_tokenizer(args.batch_size, tokenizer, args.max_input_length)

generate_kwargs = dict(max_new_tokens=args.max_new_tokens, do_sample=False)

pipeline_class = getattr(pipelines, args.pipeline_class)
Expand Down
7 changes: 6 additions & 1 deletion src/pipelines/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ def __init__(self, args: Namespace) -> None:
self.config, self.tokenizer, self.model_class = get_config_tokenizer_model_class(args)
self.model = None
self.input_device = None
self.max_input_length = args.max_input_length

def __call__(self, text: List[str], **generate_kwargs) -> Tuple[List[str], List[int]]:
input_tokens = self.tokenizer(text, return_tensors="pt", padding=True)
if self.max_input_length == -1:
input_tokens = self.tokenizer(text, return_tensors="pt", padding=True)
else:
input_tokens = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=self.max_input_length)

for t in input_tokens:
if torch.is_tensor(input_tokens[t]):
Expand All @@ -25,6 +29,7 @@ def __call__(self, text: List[str], **generate_kwargs) -> Tuple[List[str], List[
output_tokens = output.sequences

input_token_lengths = [x.shape[0] for x in input_tokens.input_ids]
print("Input token lengths: ", input_token_lengths)
output_token_lengths = [x.shape[0] for x in output_tokens]
num_generated_tokens = [o - i for i, o in zip(input_token_lengths, output_token_lengths)]

Expand Down
2 changes: 1 addition & 1 deletion src/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .arguments import get_arg_parser, get_args
from .benchmark import benchmark_end_to_end
from .dummy import get_dummy_batch
from .dummy import get_dummy_batch, get_dummy_batch_tokenizer
1 change: 1 addition & 0 deletions src/utils/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def get_arg_parser() -> ArgumentParser:
parser.add_argument("--model_class", default="GPT2", type=str)
parser.add_argument("--batch_size", default=1, type=int)
parser.add_argument("--dtype", default="bfloat16", type=str)
parser.add_argument("--max_input_length", default=-1, type=int)
parser.add_argument("--max_new_tokens", default=100, type=int)
parser.add_argument("--local_rank", type=int)
parser.add_argument("--hidden_size", type=int)
Expand Down
9 changes: 8 additions & 1 deletion src/utils/dummy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
import math
from typing import List

import random

dummy_input_sentences = [
"DeepSpeed is a machine learning framework",
Expand All @@ -24,3 +24,10 @@ def get_dummy_batch(batch_size: int, input_sentences: List[str] = None) -> List[
input_sentences = input_sentences[:batch_size]

return input_sentences

def get_dummy_batch_tokenizer(batch_size : int, tokenizer, max_input_length : int) -> List[str]:
input_sentences = []
for i in range(batch_size):
sentence = [random.randint(0, tokenizer.vocab_size-1) for _ in range(max_input_length)]
input_sentences.append(tokenizer.decode(sentence))
return input_sentences