diff --git a/src/huggingface_inference_toolkit/const.py b/src/huggingface_inference_toolkit/const.py index 958afcd9..d7dc3688 100644 --- a/src/huggingface_inference_toolkit/const.py +++ b/src/huggingface_inference_toolkit/const.py @@ -9,6 +9,6 @@ HF_REVISION = os.environ.get("HF_REVISION", None) HF_HUB_TOKEN = os.environ.get("HF_HUB_TOKEN", None) # custom handler consts -HF_DEFAULT_PIPELINE_NAME = os.environ.get("HF_DEFAULT_PIPELINE_NAME", "pipeline.py") +HF_DEFAULT_PIPELINE_NAME = os.environ.get("HF_DEFAULT_PIPELINE_NAME", "handler.py") # default is pipeline.PreTrainedPipeline -HF_MODULE_NAME = os.environ.get("HF_MODULE_NAME", f"{Path(HF_DEFAULT_PIPELINE_NAME).stem}.PreTrainedPipeline") +HF_MODULE_NAME = os.environ.get("HF_MODULE_NAME", f"{Path(HF_DEFAULT_PIPELINE_NAME).stem}.EndpointHandler") diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py index b8e6cf5e..18dc4aa3 100644 --- a/src/huggingface_inference_toolkit/utils.py +++ b/src/huggingface_inference_toolkit/utils.py @@ -189,15 +189,30 @@ def check_and_register_custom_pipeline_from_directory(model_dir): """ # path to custom handler custom_module = Path(model_dir).joinpath(HF_DEFAULT_PIPELINE_NAME) + legacy_module = Path(model_dir).joinpath("pipeline.py") if custom_module.is_file(): logger.info(f"Found custom pipeline at {custom_module}") spec = importlib.util.spec_from_file_location(HF_MODULE_NAME, custom_module) + if spec: + # add the whole directory to path for submodlues + sys.path.insert(0, model_dir) + # import custom handler + handler = importlib.util.module_from_spec(spec) + sys.modules[HF_MODULE_NAME] = handler + spec.loader.exec_module(handler) + # init custom handler with model_dir + custom_pipeline = handler.EndpointHandler(model_dir) + elif legacy_module.is_file(): + logger.warning( + "You are using a legacy custom pipeline with. Please update to the new format. See documentation for more information." + ) + spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module) if spec: # add the whole directory to path for submodlues sys.path.insert(0, model_dir) # import custom handler pipeline = importlib.util.module_from_spec(spec) - sys.modules[HF_MODULE_NAME] = pipeline + sys.modules["pipeline.PreTrainedPipeline"] = pipeline spec.loader.exec_module(pipeline) # init custom handler with model_dir custom_pipeline = pipeline.PreTrainedPipeline(model_dir) diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py index a0415923..fe30952f 100644 --- a/tests/integ/test_container.py +++ b/tests/integ/test_container.py @@ -16,7 +16,6 @@ client = docker.from_env() - def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str): try: previous = client.containers.get(container_name) @@ -173,24 +172,62 @@ def test_pt_container_local_model(task) -> None: container.remove() +@require_torch +@pytest.mark.parametrize( + "repository_id", + ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"], +) +def test_pt_container_custom_handler(repository_id) -> None: + container_name = "integration-test-custom" + container_image = f"starlette-transformers:{DEVICE}" + device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + port = random.randint(5000, 6000) + + make_sure_other_containers_are_stopped(client, container_name) + with tempfile.TemporaryDirectory() as tmpdirname: + # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py + storage_dir = _load_repository_from_hf(repository_id, tmpdirname) + container = client.containers.run( + container_image, + name=container_name, + ports={"5000": port}, + environment={ + "HF_MODEL_DIR": tmpdirname, + }, + volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}}, + detach=True, + # GPU + device_requests=device_request, + ) + BASE_URL = f"http://localhost:{port}" + wait_for_container_to_be_ready(BASE_URL) + payload = {"inputs": "this is a test"} + prediction = requests.post(f"{BASE_URL}", json=payload).json() + assert prediction == payload + # time.sleep(5) + container.stop() + container.remove() + + @require_torch @pytest.mark.parametrize( "repository_id", ["philschmid/custom-pipeline-text-classification"], ) -def test_pt_container_custom_pipeline(repository_id) -> None: +def test_pt_container_legacy_custom_pipeline(repository_id) -> None: container_name = "integration-test-custom" container_image = f"starlette-transformers:{DEVICE}" device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else [] + port = random.randint(5000, 6000) make_sure_other_containers_are_stopped(client, container_name) with tempfile.TemporaryDirectory() as tmpdirname: # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py - storage_dir = _load_repository_from_hf("philschmid/custom-pipeline-text-classification", tmpdirname) + storage_dir = _load_repository_from_hf(repository_id, tmpdirname) container = client.containers.run( container_image, name=container_name, - ports={"5000": "5000"}, + ports={"5000": port}, environment={ "HF_MODEL_DIR": tmpdirname, }, @@ -199,7 +236,7 @@ def test_pt_container_custom_pipeline(repository_id) -> None: # GPU device_requests=device_request, ) - BASE_URL = "http://localhost:5000" + BASE_URL = f"http://localhost:{port}" wait_for_container_to_be_ready(BASE_URL) payload = {"inputs": "this is a test"} prediction = requests.post(f"{BASE_URL}", json=payload).json()