diff --git a/src/huggingface_inference_toolkit/const.py b/src/huggingface_inference_toolkit/const.py
index 958afcd9..d7dc3688 100644
--- a/src/huggingface_inference_toolkit/const.py
+++ b/src/huggingface_inference_toolkit/const.py
@@ -9,6 +9,6 @@
 HF_REVISION = os.environ.get("HF_REVISION", None)
 HF_HUB_TOKEN = os.environ.get("HF_HUB_TOKEN", None)
 # custom handler consts
-HF_DEFAULT_PIPELINE_NAME = os.environ.get("HF_DEFAULT_PIPELINE_NAME", "pipeline.py")
+HF_DEFAULT_PIPELINE_NAME = os.environ.get("HF_DEFAULT_PIPELINE_NAME", "handler.py")
 # default is pipeline.PreTrainedPipeline
-HF_MODULE_NAME = os.environ.get("HF_MODULE_NAME", f"{Path(HF_DEFAULT_PIPELINE_NAME).stem}.PreTrainedPipeline")
+HF_MODULE_NAME = os.environ.get("HF_MODULE_NAME", f"{Path(HF_DEFAULT_PIPELINE_NAME).stem}.EndpointHandler")
diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py
index b8e6cf5e..18dc4aa3 100644
--- a/src/huggingface_inference_toolkit/utils.py
+++ b/src/huggingface_inference_toolkit/utils.py
@@ -189,15 +189,30 @@ def check_and_register_custom_pipeline_from_directory(model_dir):
     """
     # path to custom handler
     custom_module = Path(model_dir).joinpath(HF_DEFAULT_PIPELINE_NAME)
+    legacy_module = Path(model_dir).joinpath("pipeline.py")
     if custom_module.is_file():
         logger.info(f"Found custom pipeline at {custom_module}")
         spec = importlib.util.spec_from_file_location(HF_MODULE_NAME, custom_module)
+        if spec:
+            # add the whole directory to path for submodlues
+            sys.path.insert(0, model_dir)
+            # import custom handler
+            handler = importlib.util.module_from_spec(spec)
+            sys.modules[HF_MODULE_NAME] = handler
+            spec.loader.exec_module(handler)
+            # init custom handler with model_dir
+            custom_pipeline = handler.EndpointHandler(model_dir)
+    elif legacy_module.is_file():
+        logger.warning(
+            "You are using a legacy custom pipeline with. Please update to the new format. See documentation for more information."
+        )
+        spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module)
         if spec:
             # add the whole directory to path for submodlues
             sys.path.insert(0, model_dir)
             # import custom handler
             pipeline = importlib.util.module_from_spec(spec)
-            sys.modules[HF_MODULE_NAME] = pipeline
+            sys.modules["pipeline.PreTrainedPipeline"] = pipeline
             spec.loader.exec_module(pipeline)
             # init custom handler with model_dir
             custom_pipeline = pipeline.PreTrainedPipeline(model_dir)
diff --git a/tests/integ/test_container.py b/tests/integ/test_container.py
index a0415923..fe30952f 100644
--- a/tests/integ/test_container.py
+++ b/tests/integ/test_container.py
@@ -16,7 +16,6 @@
 client = docker.from_env()
 
 
-
 def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str):
     try:
         previous = client.containers.get(container_name)
@@ -173,24 +172,62 @@ def test_pt_container_local_model(task) -> None:
         container.remove()
 
 
+@require_torch
+@pytest.mark.parametrize(
+    "repository_id",
+    ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"],
+)
+def test_pt_container_custom_handler(repository_id) -> None:
+    container_name = "integration-test-custom"
+    container_image = f"starlette-transformers:{DEVICE}"
+    device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else []
+    port = random.randint(5000, 6000)
+
+    make_sure_other_containers_are_stopped(client, container_name)
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
+        storage_dir = _load_repository_from_hf(repository_id, tmpdirname)
+        container = client.containers.run(
+            container_image,
+            name=container_name,
+            ports={"5000": port},
+            environment={
+                "HF_MODEL_DIR": tmpdirname,
+            },
+            volumes={tmpdirname: {"bind": tmpdirname, "mode": "ro"}},
+            detach=True,
+            # GPU
+            device_requests=device_request,
+        )
+        BASE_URL = f"http://localhost:{port}"
+        wait_for_container_to_be_ready(BASE_URL)
+        payload = {"inputs": "this is a test"}
+        prediction = requests.post(f"{BASE_URL}", json=payload).json()
+        assert prediction == payload
+        # time.sleep(5)
+        container.stop()
+        container.remove()
+
+
 @require_torch
 @pytest.mark.parametrize(
     "repository_id",
     ["philschmid/custom-pipeline-text-classification"],
 )
-def test_pt_container_custom_pipeline(repository_id) -> None:
+def test_pt_container_legacy_custom_pipeline(repository_id) -> None:
     container_name = "integration-test-custom"
     container_image = f"starlette-transformers:{DEVICE}"
     device_request = [docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])] if IS_GPU else []
+    port = random.randint(5000, 6000)
 
     make_sure_other_containers_are_stopped(client, container_name)
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
-        storage_dir = _load_repository_from_hf("philschmid/custom-pipeline-text-classification", tmpdirname)
+        storage_dir = _load_repository_from_hf(repository_id, tmpdirname)
         container = client.containers.run(
             container_image,
             name=container_name,
-            ports={"5000": "5000"},
+            ports={"5000": port},
             environment={
                 "HF_MODEL_DIR": tmpdirname,
             },
@@ -199,7 +236,7 @@ def test_pt_container_custom_pipeline(repository_id) -> None:
             # GPU
             device_requests=device_request,
         )
-        BASE_URL = "http://localhost:5000"
+        BASE_URL = f"http://localhost:{port}"
         wait_for_container_to_be_ready(BASE_URL)
         payload = {"inputs": "this is a test"}
         prediction = requests.post(f"{BASE_URL}", json=payload).json()