From d2b15044a08fae5975d9db8ed9835b28d687e4f4 Mon Sep 17 00:00:00 2001 From: Xin Yang Date: Tue, 6 Jun 2023 21:44:17 -0700 Subject: [PATCH] Fix input_data and device order for streaming --- engines/python/setup/djl_python/deepspeed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engines/python/setup/djl_python/deepspeed.py b/engines/python/setup/djl_python/deepspeed.py index 97501b704..29fdba6b4 100644 --- a/engines/python/setup/djl_python/deepspeed.py +++ b/engines/python/setup/djl_python/deepspeed.py @@ -324,8 +324,8 @@ def inference(self, inputs: Input): "DeepSpeed") device = torch.cuda.current_device() outputs.add_stream_content( - stream_generator(self.model, self.tokenizer, device, - input_data, **model_kwargs)) + stream_generator(self.model, self.tokenizer, input_data, + device, **model_kwargs)) return outputs if self.task == "text-generation": tokenized_inputs = self.tokenizer(