huggingface · muellerzr · Jan 25, 2024 · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024
diff --git a/src/accelerate/inference.py b/src/accelerate/inference.py
@@ -8,9 +8,12 @@
 from .state import PartialState
 from .utils import (
     calculate_maximum_sizes,
+    concatenate,
     convert_bytes,
+    find_batch_size,
     infer_auto_device_map,
     send_to_device,
+    slice_tensors,
 )
 
 
@@ -59,9 +62,42 @@ def build_pipeline(model, split_points, args, kwargs) -> PipelineStage:
 def pippy_forward(forward, *args, **kwargs):
     state = PartialState()
     output = None
+
+    def _find_batch_size(arg):
+        try:
+            return find_batch_size(arg)
+        except (ValueError, TypeError):
+            pass
+        return None
+
+    def _pad_inputs(args):
+        # Slice and copy the last input
+        extra = slice_tensors(
+            args,
+            slice(state.num_processes, state.num_processes + 1),
+        )
+        # Concat it to `args`
+        args = concatenate([args, extra])
+        return args
+
     if state.num_processes == 1:
         output = forward(*args, **kwargs)
     elif state.is_local_main_process:
+        found_batch_size = None
+        for arg in args:
+            found_batch_size = _find_batch_size(arg)
+            if found_batch_size is not None:
+                break
+        for kwarg in kwargs.values():
+            found_batch_size = _find_batch_size(kwarg)
+            if found_batch_size is not None:
+                break
+        if found_batch_size is None:
+            raise ValueError("Could not find batch size from args or kwargs")
+        else:
+            if (found_batch_size % state.num_processes) != 0:
+                args = _pad_inputs(args)
+                kwargs = _pad_inputs(kwargs)
         forward(*args, **kwargs)
     elif state.is_last_process:
         output = forward()

diff --git a/src/accelerate/test_utils/scripts/external_deps/test_pippy.py b/src/accelerate/test_utils/scripts/external_deps/test_pippy.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import torch
+from torchvision.models import resnet34
 from transformers import (
     BertConfig,
     BertForMaskedLM,
@@ -34,24 +35,28 @@
 }
 
 
-def get_model_and_data(model_name, device, num_processes: int = 2):
+def get_model_and_data_for_text(model_name, device, num_processes: int = 2):
     initializer, config, seq_len = model_to_config[model_name]
-    config = config()
-    model = initializer(config)
+    config_args = {}
+    # Eventually needed for batch inference tests on gpt-2 when bs != 1
+    # if model_name == "gpt2":
+    #     config_args["pad_token_id"] = 0
+    model_config = config(**config_args)
+    model = initializer(model_config)
     return model, torch.randint(
         low=0,
-        high=config.vocab_size,
+        high=model_config.vocab_size,
         size=(num_processes, seq_len),
         device=device,
         dtype=torch.int64,
         requires_grad=False,
     )
 
 
-def test_gpt2():
+def test_gpt2(batch_size: int = 2):
     set_seed(42)
     state = PartialState()
-    model, inputs = get_model_and_data("gpt2", "cpu", state.num_processes)
+    model, inputs = get_model_and_data_for_text("gpt2", "cpu", batch_size)
     model = prepare_pippy(model, example_args=(inputs,), no_split_module_classes=model._no_split_modules)
     # For inference args need to be a tuple
     inputs = inputs.to("cuda")
@@ -64,10 +69,10 @@ def test_gpt2():
         assert output is not None, "Output was not generated in the last process!"
 
 
-def test_t5():
+def test_t5(batch_size: int = 2):
     set_seed(42)
     state = PartialState()
-    model, inputs = get_model_and_data("t5", "cpu", state.num_processes)
+    model, inputs = get_model_and_data_for_text("t5", "cpu", batch_size)
     example_inputs = {"input_ids": inputs, "decoder_input_ids": inputs}
     model = prepare_pippy(
         model,
@@ -85,13 +90,40 @@ def test_t5():
         assert output is not None, "Output was not generated in the last process!"
 
 
+def test_resnet(batch_size: int = 2):
+    set_seed(42)
+    state = PartialState()
+    model = resnet34()
+    input_tensor = torch.rand(batch_size, 3, 224, 224)
+    model = prepare_pippy(
+        model,
+        example_args=(input_tensor,),
+    )
+    inputs = send_to_device(input_tensor, "cuda:0")
+    with torch.no_grad():
+        output = model(inputs)
+    # Zach: Check that we just grab the real outputs we need at the end
+    if not state.is_last_process:
+        assert output is None, "Output was not generated on just the last process!"
+    else:
+        assert output is not None, "Output was not generated in the last process!"
+
+
 if __name__ == "__main__":
     state = PartialState()
     state.print("Testing pippy integration...")
     if state.distributed_type == DistributedType.MULTI_GPU:
         state.print("Testing GPT2...")
         test_gpt2()
+        # Issue: When modifying the tokenizer for batch GPT2 inference, there's an issue
+        # due to references
+        # NameError: cannot access free variable 'chunk_args_list' where it is not associated with a value in enclosing scope
+        # test_gpt2(3)
         state.print("Testing T5...")
         test_t5()
+        test_t5(3)
+        state.print("Testing CV model...")
+        test_resnet()
+        test_resnet(3)
     else:
         print("Less than two GPUs found, not running tests!")