huggingface · muellerzr · Jan 25, 2024 · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/src/accelerate/inference.py b/src/accelerate/inference.py
@@ -8,9 +8,12 @@
 from .state import PartialState
 from .utils import (
     calculate_maximum_sizes,
+    concatenate,
     convert_bytes,
+    find_batch_size,
     infer_auto_device_map,
     send_to_device,
+    slice_tensors,
 )
 
 
@@ -59,9 +62,42 @@ def build_pipeline(model, split_points, args, kwargs) -> PipelineStage:
 def pippy_forward(forward, *args, **kwargs):
     state = PartialState()
     output = None
+
+    def _find_batch_size(args):
+        try:
+            return find_batch_size(arg)
+        except (ValueError, TypeError):
+            pass
+        return None
+
+    def _pad_inputs(args):
+        core = slice_tensors(args, slice(0, state.num_processes), process_index=0, num_processes=state.num_processes)
+        # Do args first
+        extra = slice_tensors(
+            args,
+            slice(state.num_processes, state.num_processes + 1),
+            process_index=0,
+            num_processes=state.num_processes,
+        )
+        extra = concatenate([extra] * ((found_batch_size % state.num_processes) + 1))
+        args = concatenate([core, extra])
+        return args
+
     if state.num_processes == 1:
         output = forward(*args, **kwargs)
     elif state.is_local_main_process:
+        found_batch_size = None
+        for arg in args:
+            found_batch_size = _find_batch_size(arg)
+        if found_batch_size is None:
+            for kwarg in kwargs.values():
+                found_batch_size = _find_batch_size(kwarg)
+        if found_batch_size is None:
+            raise ValueError("Could not find batch size from args or kwargs")
+        else:
+            if (found_batch_size % state.num_processes) != 0:
+                args = _pad_inputs(args)
+                kwargs = _pad_inputs(kwargs)
         forward(*args, **kwargs)
     elif state.is_last_process:
         output = forward()

diff --git a/src/accelerate/test_utils/scripts/external_deps/test_pippy.py b/src/accelerate/test_utils/scripts/external_deps/test_pippy.py
@@ -36,22 +36,26 @@
 
 def get_model_and_data(model_name, device, num_processes: int = 2):
     initializer, config, seq_len = model_to_config[model_name]
-    config = config()
-    model = initializer(config)
+    config_args = {}
+    # Eventually needed for batch inference tests on gpt-2 when bs != 1
+    # if model_name == "gpt2":
+    #     config_args["pad_token_id"] = 0
+    model_config = config(**config_args)
+    model = initializer(model_config)
     return model, torch.randint(
         low=0,
-        high=config.vocab_size,
+        high=model_config.vocab_size,
         size=(num_processes, seq_len),
         device=device,
         dtype=torch.int64,
         requires_grad=False,
     )
 
 
-def test_gpt2():
+def test_gpt2(batch_size: int = 2):
     set_seed(42)
     state = PartialState()
-    model, inputs = get_model_and_data("gpt2", "cpu", state.num_processes)
+    model, inputs = get_model_and_data("gpt2", "cpu", batch_size)
     model = prepare_pippy(model, example_args=(inputs,), no_split_module_classes=model._no_split_modules)
     # For inference args need to be a tuple
     inputs = inputs.to("cuda")
@@ -64,10 +68,10 @@ def test_gpt2():
         assert output is not None, "Output was not generated in the last process!"
 
 
-def test_t5():
+def test_t5(batch_size: int = 2):
     set_seed(42)
     state = PartialState()
-    model, inputs = get_model_and_data("t5", "cpu", state.num_processes)
+    model, inputs = get_model_and_data("t5", "cpu", batch_size)
     example_inputs = {"input_ids": inputs, "decoder_input_ids": inputs}
     model = prepare_pippy(
         model,
@@ -91,7 +95,12 @@ def test_t5():
     if state.distributed_type == DistributedType.MULTI_GPU:
         state.print("Testing GPT2...")
         test_gpt2()
+        # Issue: When modifying the tokenizer for batch GPT2 inference, there's an issue
+        # due to references
+        # NameError: cannot access free variable 'chunk_args_list' where it is not associated with a value in enclosing scope
+        # test_gpt2(3)
         state.print("Testing T5...")
         test_t5()
+        test_t5(3)
     else:
         print("Less than two GPUs found, not running tests!")