From 1ec968bb0c3da514d0c3d72f878609b40251ea03 Mon Sep 17 00:00:00 2001 From: Samyam Rajbhandari Date: Tue, 22 Mar 2022 00:57:08 +0000 Subject: [PATCH] qkv_out can be a single tensor or a list. Handling these cases separetely. --- deepspeed/ops/transformer/inference/transformer_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/ops/transformer/inference/transformer_inference.py b/deepspeed/ops/transformer/inference/transformer_inference.py index 5527f350401b..9acca965c16e 100755 --- a/deepspeed/ops/transformer/inference/transformer_inference.py +++ b/deepspeed/ops/transformer/inference/transformer_inference.py @@ -274,7 +274,7 @@ def selfAttention_fp(): norm_b, config.epsilon, (attn_qkvb is not None)) - context_layer, key_layer, value_layer = compute_attention(qkv_out[0], input_mask) + context_layer, key_layer, value_layer = compute_attention(qkv_out[0] if isinstance(qkv_out, list) else qkv_out, input_mask) output = vector_matmul_func(context_layer, attn_ow, False) return output, key_layer, value_layer, context_layer, qkv_out[-1] # attn_out, present_key, present_value, context_output, inp_norm