diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..2cb0b490
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+.github
+.pytest_cache
+.ruff_cache
+.tox
+.venv
+.gitignore
+makefile
+__pycache__
+tests
+.vscode
diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml
index 031207c0..fe12fbf6 100644
--- a/.github/workflows/build-container.yaml
+++ b/.github/workflows/build-container.yaml
@@ -19,7 +19,8 @@ jobs:
     uses: ./.github/workflows/docker-build-action.yaml
     with:
       image: inference-pytorch-cpu
-      dockerfile: dockerfiles/pytorch/cpu/Dockerfile
+      dockerfile: dockerfiles/pytorch/Dockerfile
+      build_args: "BASE_IMAGE=ubuntu:22.04"
     secrets:
       TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }}
       REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
@@ -28,7 +29,7 @@ jobs:
     uses: ./.github/workflows/docker-build-action.yaml
     with:
       image: inference-pytorch-gpu
-      dockerfile: dockerfiles/pytorch/gpu/Dockerfile
+      dockerfile: dockerfiles/pytorch/Dockerfile
     secrets:
       TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }}
       REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
diff --git a/.github/workflows/gpu-integ-test.yaml b/.github/workflows/gpu-integ-test.yaml
deleted file mode 100644
index ede153ea..00000000
--- a/.github/workflows/gpu-integ-test.yaml
+++ /dev/null
@@ -1,116 +0,0 @@
-name: GPU - Run Integration Tests
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-
-jobs:
-  start-runner:
-    name: Start self-hosted EC2 runner
-    runs-on: ubuntu-latest
-    env:
-      AWS_REGION: us-east-1
-      EC2_AMI_ID: ami-0dc1c26161f869ed1
-      EC2_INSTANCE_TYPE: g4dn.xlarge
-      EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180
-      EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13
-      EC2_IAM_ROLE: optimum-ec2-github-actions-role
-    outputs:
-      label: ${{ steps.start-ec2-runner.outputs.label }}
-      ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
-    steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v1
-        with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-      - name: Start EC2 runner
-        id: start-ec2-runner
-        uses: philschmid/philschmid-ec2-github-runner@main
-        with:
-          mode: start
-          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
-          ec2-image-id: ${{ env.EC2_AMI_ID }}
-          ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
-          subnet-id: ${{ env.EC2_SUBNET_ID }}
-          security-group-id: ${{ env.EC2_SECURITY_GROUP }}
-          iam-role-name: ${{ env.EC2_IAM_ROLE }}
-          aws-resource-tags: > # optional, requires additional permissions
-            [
-              {"Key": "Name", "Value": "ec2-optimum-github-runner"},
-              {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
-            ]
-  pytorch-integration-test:
-    needs: start-runner # required to start the main job when the runner is ready
-    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
-    env:
-      AWS_REGION: us-east-1
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Set up Python 3.9
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.9
-      - name: Install Python dependencies
-        run: pip install -e .[test,dev,torch]
-      - name: Build Docker
-        run: docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile .
-      - name: Run Integration Tests
-        run: RUN_SLOW=True make integ-test
-  tensorflow-integration-test:
-    needs:
-      - start-runner
-      - pytorch-integration-test
-    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
-    env:
-      AWS_REGION: us-east-1
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Set up Python 3.9
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.9
-      - name: Uninstall pytorch
-        run: pip uninstall torch torchvision -y
-      - name: Install Python dependencies
-        run: pip install -e .[test,dev,tensorflow]
-      - name: Build Docker
-        run: docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile .
-      - name: Run Integration Tests
-        run: RUN_SLOW=True make integ-test
-
-  stop-runner:
-    name: Stop self-hosted EC2 runner
-    needs:
-      - start-runner
-      - pytorch-integration-test
-      - tensorflow-integration-test
-    runs-on: ubuntu-latest
-    env:
-      AWS_REGION: us-east-1
-    if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
-    steps:
-      - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v1
-        with:
-          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          aws-region: ${{ env.AWS_REGION }}
-      - name: Stop EC2 runner
-        uses: philschmid/philschmid-ec2-github-runner@main
-        with:
-          mode: stop
-          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
-          label: ${{ needs.start-runner.outputs.label }}
-          ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} 
\ No newline at end of file
diff --git a/.github/workflows/integ-test.yaml b/.github/workflows/integ-test.yaml
deleted file mode 100644
index f6f6bba0..00000000
--- a/.github/workflows/integ-test.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-name: CPU - Run Integration Tests
-
-on:
-  push:
-    branches:
-      - main
-  pull_request:
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-
-jobs:
-  pytorch-integration-test:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.9
-    - name: Install Python dependencies
-      run: pip install -e .[test,dev,torch]
-    - name: Build Docker
-      run: docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile .
-    - name: Run Integration Tests
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        AWS_DEFAULT_REGION: us-east-1
-      run: make integ-test
-  tensorflow-integration-test:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.9
-    - name: Install Python dependencies
-      run: pip install -e .[test,dev,tensorflow]
-    - name: Build Docker
-      run: docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile .
-    - name: Run Integration Tests
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-        AWS_DEFAULT_REGION: us-east-1
-      run: make integ-test
\ No newline at end of file
diff --git a/.github/workflows/integration-test-action.yaml b/.github/workflows/integration-test-action.yaml
new file mode 100644
index 00000000..2e3479fc
--- /dev/null
+++ b/.github/workflows/integration-test-action.yaml
@@ -0,0 +1,69 @@
+on:
+  workflow_call:
+    inputs:
+      region:
+        type: string
+        required: false
+        default: "us-east-1"
+      hf_home:
+        required: false
+        type: string
+        default: "/mnt/hf_cache/"
+      hf_hub_cache:
+        required: false
+        type: string
+        default: "/mnt/hf_cache/hub"
+      run_slow:
+        required: false
+        type: string
+        default: "True"
+      test_path:
+        type: string
+        required: true
+      test_parallelism:
+        type: string
+        required: false
+        default: "4"
+      build_img_cmd:
+        type: string
+        required: false
+        default: "make inference-pytorch-gpu"
+      log_level:
+        type: string
+        required: false
+        default: "ERROR"
+      log_format:
+        type: string
+        required: false
+        default: "%(asctime)s %(levelname)s %(module)s:%(lineno)d %(message)s"
+      runs_on:
+        type: string
+        required: false
+        default: '["single-gpu", "nvidia-gpu", "t4", "ci"]'
+
+jobs:
+  pytorch-integration-tests:
+    runs-on: ${{ fromJson(inputs.runs_on) }}
+    env:
+      AWS_REGION: ${{ inputs.region }}
+      HF_HOME: ${{ inputs.hf_home }}
+      HF_HUB_CACHE: ${{ inputs.hf_hub_cache }}
+      RUN_SLOW: ${{ inputs.run_slow }}
+    steps:
+    - uses: actions/checkout@v4.1.1
+    - name: Docker Setup Buildx
+      uses: docker/setup-buildx-action@v3.0.0
+    - name: Docker Build
+      run: ${{ inputs.build_img_cmd }}
+    - name: Set up Python 3.11
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.11
+    - name: Install dependencies
+      run: pip install ".[torch, test]"
+    - name: Run local integration tests
+      run: |
+        python -m pytest \
+        ${{ inputs.test_path }} -n ${{ inputs.test_parallelism }} \
+        --log-cli-level='${{ inputs.log_level }}' \
+        --log-format='${{ inputs.log_format }}'
\ No newline at end of file
diff --git a/.github/workflows/integration-test.yaml b/.github/workflows/integration-test.yaml
new file mode 100644
index 00000000..7aa1aa2f
--- /dev/null
+++ b/.github/workflows/integration-test.yaml
@@ -0,0 +1,44 @@
+name: Run Integration Tests
+
+on:
+  push:
+    paths-ignore:
+      - 'README.md'
+      - '.github/workflows/unit-test.yaml'
+      - '.github/workflows/quality.yaml'
+    branches:
+     - main
+  pull_request:
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  pytorch-integration-local-gpu:
+    name: Local Integration Tests - GPU
+    uses: ./.github/workflows/integration-test-action.yaml
+    with:
+      test_path: "tests/integ/test_pytorch_local_gpu.py"
+      build_img_cmd: "make inference-pytorch-gpu"
+  pytorch-integration-remote-gpu:
+    name: Remote Integration Tests - GPU
+    uses: ./.github/workflows/integration-test-action.yaml
+    with:
+      test_path: "tests/integ/test_pytorch_remote_gpu.py"
+      build_img_cmd: "make inference-pytorch-gpu"
+  pytorch-integration-remote-cpu:
+    name: Remote Integration Tests - CPU
+    uses: ./.github/workflows/integration-test-action.yaml
+    with:
+      test_path: "tests/integ/test_pytorch_remote_cpu.py"
+      build_img_cmd: "make inference-pytorch-cpu"
+      runs_on: "['ci']"
+  pytorch-integration-local-cpu:
+    name: Local Integration Tests - CPU
+    uses: ./.github/workflows/integration-test-action.yaml
+    with:
+      test_path: "tests/integ/test_pytorch_local_cpu.py"
+      build_img_cmd: "make inference-pytorch-cpu"
+      runs_on: "['ci']"
\ No newline at end of file
diff --git a/.github/workflows/quality.yaml b/.github/workflows/quality.yaml
index 6c7e6c57..09929fde 100644
--- a/.github/workflows/quality.yaml
+++ b/.github/workflows/quality.yaml
@@ -2,6 +2,8 @@ name: Quality Check
 
 on:
   push:
+    paths-ignore:
+      - 'README.md'
     branches:
       - main
   pull_request:
@@ -16,10 +18,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - name: Set up Python 3.9
+    - name: Set up Python 3.11
       uses: actions/setup-python@v2
       with:
-        python-version: 3.9
+        python-version: 3.11
     - name: Install Python dependencies
       run: pip install -e .[quality]
     - name: Run Quality check
diff --git a/.github/workflows/unit-test.yaml b/.github/workflows/unit-test.yaml
index 599b8f7f..a15cca96 100644
--- a/.github/workflows/unit-test.yaml
+++ b/.github/workflows/unit-test.yaml
@@ -2,57 +2,47 @@ name: Run Unit-Tests
 
 on:
   push:
+    paths-ignore:
+      - 'README.md'
     branches:
-      - main
+     - main
   pull_request:
   workflow_dispatch:
 
+env:
+  ACTIONS_RUNNER_DEBUG: true
+  ACTIONS_STEP_DEBUG: true
+
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
 jobs:
   pytorch-unit-test:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.9.12
-    - name: Install Python dependencies
-      run: pip install -e .[test,dev,torch,st]
-    - uses: FedericoCarboni/setup-ffmpeg@v2
-      id: setup-ffmpeg
-    - name: Run Unit test_const
-      run: 	python -m pytest -s -v ./tests/unit/test_const.py
-    - name: Run Unit test_handler
-      run: 	python -m pytest -s -v ./tests/unit/test_handler.py
-    - name: Run Unit test_sentence_transformers
-      run: 	python -m pytest -s -v ./tests/unit/test_sentence_transformers.py
-    - name: Run Unit test_serializer
-      run: 	python -m pytest -s -v ./tests/unit/test_serializer.py
-    - name: Run Unit test_utils
-      run: 	python -m pytest -s -v ./tests/unit/test_utils.py
-  tensorflow-unit-test:
-    runs-on: ubuntu-latest
+    runs-on: [single-gpu, nvidia-gpu, t4, ci]
+    env:
+      AWS_REGION: us-east-1
+      CACHE_TEST_DIR: /mnt/hf_cache/hf-inference-toolkit-tests
+      RUN_SLOW: True
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.9
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.9.12
-    - name: Install Python dependencies
-      run: pip install -e .[test,dev,tensorflow]
-    - name: Run Unit test_const
-      run: 	python -m pytest -s -v ./tests/unit/test_const.py
-    - name: Run Unit test_handler
-      run: 	python -m pytest -s -v ./tests/unit/test_handler.py
-    - name: Run Unit test_sentence_transformers
-      run: 	python -m pytest -s -v ./tests/unit/test_sentence_transformers.py
-    - name: Run Unit test_serializer
-      run: 	python -m pytest -s -v ./tests/unit/test_serializer.py
-    - name: Run Unit test_utils
-      run: 	python -m pytest -s -v ./tests/unit/test_utils.py
-
-
+    - uses: actions/checkout@v4.1.1
+    - name: Copy unit tests to cache mount
+      run: |
+        rm -rf ${{ env.CACHE_TEST_DIR }} && \
+        mkdir ${{ env.CACHE_TEST_DIR }} && \
+        cp -r tests ${{ env.CACHE_TEST_DIR }}
+    - name: Docker Setup Buildx
+      uses: docker/setup-buildx-action@v3.0.0
+    - name: Docker Build
+      run: make inference-pytorch-gpu
+    - name: Run unit tests
+      run: |
+        docker run \
+          -e RUN_SLOW='${{ env.RUN_SLOW }}' \
+          --gpus all \
+          -e CACHE_TEST_DIR='${{ env.CACHE_TEST_DIR }}' \
+          -v ./tests:${{ env.CACHE_TEST_DIR }} \
+          --entrypoint /bin/bash \
+          integration-test-pytorch:gpu \
+          -c "pip install '.[test, st, diffusers]' && pytest ${{ env.CACHE_TEST_DIR }}/unit"
+        
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 4042db87..bb0c387b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,15 @@
 # Docker project generated files to ignore
 #  if you want to ignore files created by your editor/tools,
 #  please consider a global .gitignore https://help.github.com/articles/ignoring-files
+.gitignore
+.egg-info
+.ruff_cache
 .vagrant*
+.hcl
+.terraform.lock.hcl
+.terraform
+pip-unpack-*
+__pycache__
 bin
 docker/docker
 .*.swp
@@ -27,6 +35,9 @@ Vagrantfile
 __pycache__/
 *.py[cod]
 *$py.class
+.vscode
+.make
+tox.ini
 
 # C extensions
 *.so
@@ -166,4 +177,6 @@ cython_debug/
 .sagemaker
 model
 tests/tmp
-tmp/
\ No newline at end of file
+tmp/
+act.sh
+.act
\ No newline at end of file
diff --git a/README.md b/README.md
index fb469b1a..f2f66b40 100644
--- a/README.md
+++ b/README.md
@@ -24,25 +24,23 @@ HF_MODEL_ID=hf-internal-testing/tiny-random-distilbert HF_MODEL_DIR=tmp2 HF_TASK
 
 _cpu images_
 ```bash
-docker build -t starlette-transformers:cpu -f dockerfiles/pytorch/cpu/Dockerfile .
-docker build -t starlette-transformers:cpu -f dockerfiles/tensorflow/cpu/Dockerfile .
+make inference-pytorch-cpu
 ```
 
 _gpu images_
 ```bash
-docker build -t starlette-transformers:gpu -f dockerfiles/pytorch/gpu/Dockerfile .
-docker build -t starlette-transformers:gpu -f dockerfiles/tensorflow/gpu/Dockerfile .
+make inference-pytorch-gpu
 ```
 
 2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored.
 
 
 ```bash
-docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering starlette-transformers:cpu
-docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=nlpconnect/vit-gpt2-image-captioning -e HF_TASK=image-to-text starlette-transformers:gpu
-docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=echarlaix/tiny-random-stable-diffusion-xl -e HF_TASK=text-to-image starlette-transformers:gpu
-docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=stabilityai/stable-diffusion-xl-base-1.0 -e HF_TASK=text-to-image starlette-transformers:gpu
-docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository starlette-transformers:cpu
+docker run -ti -p 5000:5000 -e HF_MODEL_ID=distilbert-base-uncased-distilled-squad -e HF_TASK=question-answering integration-test-pytorch:cpu
+docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=nlpconnect/vit-gpt2-image-captioning -e HF_TASK=image-to-text integration-test-pytorch:gpu
+docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=echarlaix/tiny-random-stable-diffusion-xl -e HF_TASK=text-to-image integration-test-pytorch:gpu
+docker run -ti -p 5000:5000 --gpus all -e HF_MODEL_ID=stabilityai/stable-diffusion-xl-base-1.0 -e HF_TASK=text-to-image integration-test-pytorch:gpu
+docker run -ti -p 5000:5000 -e HF_MODEL_DIR=/repository -v $(pwd)/distilbert-base-uncased-emotion:/repository integration-test-pytorch:cpu
 ```
 
 
@@ -184,7 +182,17 @@ Below you ll find a list of supported and tested transformers and sentence trans
 ---
 ## 🤝 Contributing
 
-TBD. 
+### Development
+
+* Recommended Python version: 3.11
+* We recommend `pyenv` for easily switching between different Python versions
+* There are two options for unit and integration tests:
+	* `Make` - see `makefile`
+
+#### Testing with Make
+
+* Unit Testing: `make unit-test`
+* Integration testing: `make integ-test`
 
 ---
 ## 📜  License
diff --git a/dockerfiles/pytorch/Dockerfile b/dockerfiles/pytorch/Dockerfile
new file mode 100644
index 00000000..8e4c4d35
--- /dev/null
+++ b/dockerfiles/pytorch/Dockerfile
@@ -0,0 +1,48 @@
+ARG BASE_IMAGE=nvidia/cuda:12.1.0-devel-ubuntu22.04
+
+FROM $BASE_IMAGE
+SHELL ["/bin/bash", "-c"]
+
+LABEL maintainer="Hugging Face"
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install software-properties-common -y && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \
+    apt-get install -y \
+        build-essential \
+        bzip2 \
+        curl \
+        git \
+        git-lfs \
+        tar \
+        gcc \
+        g++ \
+        cmake \
+        libprotobuf-dev \
+        protobuf-compiler \
+        python3-dev \
+        python3-pip \
+        python3.11 \
+        libsndfile1-dev \
+        ffmpeg \
+    && apt-get clean autoremove --yes \
+    && rm -rf /var/lib/{apt,dpkg,cache,log}
+# Copying only necessary files as filtered by .dockerignore
+COPY . .
+
+# install wheel and setuptools
+RUN pip install --no-cache-dir -U pip ".[torch, st, diffusers]"
+
+# copy application
+COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
+COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py
+
+# copy entrypoint and change permissions
+COPY --chmod=0755  scripts/entrypoint.sh entrypoint.sh
+
+ENTRYPOINT ["bash", "-c", "./entrypoint.sh"]
\ No newline at end of file
diff --git a/dockerfiles/pytorch/cpu/Dockerfile b/dockerfiles/pytorch/cpu/Dockerfile
deleted file mode 100644
index 61e573b4..00000000
--- a/dockerfiles/pytorch/cpu/Dockerfile
+++ /dev/null
@@ -1,52 +0,0 @@
-FROM ubuntu:22.04
-
-LABEL maintainer="Hugging Face"
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update \
-    && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \
-    && apt-get install -y \
-    bzip2 \
-    curl \
-    git \
-    git-lfs \
-    tar \
-    gcc \
-    g++ \
-    # audio
-    libsndfile1-dev \
-    ffmpeg \
-    && apt-get clean autoremove --yes \
-    && rm -rf /var/lib/{apt,dpkg,cache,log}
-
-# install micromamba
-ENV MAMBA_ROOT_PREFIX=/opt/conda
-ENV PATH=/opt/conda/bin:$PATH
-RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \
-    && touch /root/.bashrc \
-    && ./bin/micromamba shell init -s bash -p /opt/conda  \
-    && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc  > /opt/conda/bashrc
-
-WORKDIR /app
-
-# install base python dependencies
-COPY dockerfiles/pytorch/cpu/environment.yaml /app/environment.yaml
-RUN micromamba install -y -n base -f environment.yaml \
-    && rm environment.yaml \
-    && micromamba clean --all --yes
-
-# install huggingface inference toolkit
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install  --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
-
-# copy application
-COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
-COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py
-
-# copy entrypoint and change permissions
-COPY scripts/entrypoint.sh entrypoint.sh
-RUN chmod +x entrypoint.sh
-
-# run app
-ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
diff --git a/dockerfiles/pytorch/cpu/environment.yaml b/dockerfiles/pytorch/cpu/environment.yaml
deleted file mode 100644
index 4bd1b693..00000000
--- a/dockerfiles/pytorch/cpu/environment.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: base
-channels:
-- conda-forge
-dependencies:
-- python=3.9.13
-- pytorch::pytorch=1.13.1=py3.9_cpu_0
-- pip:
-  - transformers[sklearn,sentencepiece,audio,vision]==4.31.0
-  - sentence_transformers==2.2.2
-  - torchvision==0.14.1
-  - diffusers==0.20.0
-  - accelerate==0.21.0
-  - safetensors
\ No newline at end of file
diff --git a/dockerfiles/pytorch/gpu/Dockerfile b/dockerfiles/pytorch/gpu/Dockerfile
deleted file mode 100644
index 1a3941a7..00000000
--- a/dockerfiles/pytorch/gpu/Dockerfile
+++ /dev/null
@@ -1,54 +0,0 @@
-FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
-
-LABEL maintainer="Hugging Face"
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get update \
-    && apt-get -y upgrade --only-upgrade systemd openssl cryptsetup \
-    && apt-get install -y \
-    bzip2 \
-    curl \
-    git \
-    git-lfs \
-    tar \
-    gcc \
-    g++ \
-    # audio
-    libsndfile1-dev \
-    ffmpeg \
-    && apt-get clean autoremove --yes \
-    && rm -rf /var/lib/{apt,dpkg,cache,log}
-
-# install micromamba
-ENV MAMBA_ROOT_PREFIX=/opt/conda
-ENV PATH=/opt/conda/bin:$PATH
-ENV LD_LIBRARY_PATH="/opt/conda/lib:${LD_LIBRARY_PATH}"
-
-RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin/micromamba" \
-    && touch /root/.bashrc \
-    && ./bin/micromamba shell init -s bash -p /opt/conda  \
-    && grep -v '[ -z "\$PS1" ] && return' /root/.bashrc  > /opt/conda/bashrc
-
-WORKDIR /app
-
-# install base python dependencies
-COPY dockerfiles/pytorch/gpu/environment.yaml /app/environment.yaml
-RUN micromamba install -y -n base -f environment.yaml \
-    && rm environment.yaml \
-    && micromamba clean --all --yes
-
-# install huggingface inference toolkit
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install  --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
-
-# copy application
-COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
-COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py
-
-# copy entrypoint and change permissions
-COPY scripts/entrypoint.sh entrypoint.sh
-RUN chmod +x entrypoint.sh
-
-# run app
-ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
diff --git a/dockerfiles/pytorch/gpu/environment.yaml b/dockerfiles/pytorch/gpu/environment.yaml
deleted file mode 100644
index 8c1012f7..00000000
--- a/dockerfiles/pytorch/gpu/environment.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-name: base
-channels:
-- conda-forge
-dependencies:
-- python=3.9.13
-- nvidia::cudatoolkit=11.7
-- pytorch::pytorch=1.13.1=py3.9_cuda11.7*
-- pip:
-  - transformers[sklearn,sentencepiece,audio,vision]==4.31.0
-  - sentence_transformers==2.2.2
-  - torchvision==0.14.1
-  - diffusers==0.20.0
-  - accelerate==0.21.0
-  - safetensors
\ No newline at end of file
diff --git a/dockerfiles/tensorflow/cpu/Dockerfile b/dockerfiles/tensorflow/cpu/Dockerfile
index c52abf13..d16010bb 100644
--- a/dockerfiles/tensorflow/cpu/Dockerfile
+++ b/dockerfiles/tensorflow/cpu/Dockerfile
@@ -14,6 +14,7 @@ RUN apt-get update \
     tar \
     gcc \
     g++ \
+    cmake \
     # audio
     libsndfile1-dev \
     ffmpeg \
@@ -49,4 +50,4 @@ COPY scripts/entrypoint.sh entrypoint.sh
 RUN chmod +x entrypoint.sh
 
 # run app
-ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
+ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
\ No newline at end of file
diff --git a/dockerfiles/tensorflow/gpu/Dockerfile b/dockerfiles/tensorflow/gpu/Dockerfile
index d989111c..02018371 100644
--- a/dockerfiles/tensorflow/gpu/Dockerfile
+++ b/dockerfiles/tensorflow/gpu/Dockerfile
@@ -15,6 +15,7 @@ RUN apt-get update \
     tar \
     gcc \
     g++ \
+    cmake \
     # audio
     libsndfile1-dev \
     ffmpeg \
@@ -33,6 +34,11 @@ RUN curl -L https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xj "bin
 
 WORKDIR /app
 
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    source $HOME/.cargo/env && \
+    source .venv/bin/activate && \
+    ls -all
+
 # install base python dependencies
 COPY dockerfiles/tensorflow/gpu/environment.yaml /app/environment.yaml
 RUN micromamba install -y -n base -f environment.yaml \
@@ -43,6 +49,9 @@ RUN micromamba install -y -n base -f environment.yaml \
 COPY requirements.txt /tmp/requirements.txt
 RUN pip install  --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
 
+# copy tests
+COPY . /tmp/hf-inference-test
+
 # copy application
 COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
 COPY src/huggingface_inference_toolkit/webservice_starlette.py webservice_starlette.py
@@ -52,4 +61,4 @@ COPY scripts/entrypoint.sh entrypoint.sh
 RUN chmod +x entrypoint.sh
 
 # run app
-ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
+ENTRYPOINT ["/bin/bash", "entrypoint.sh"]
\ No newline at end of file
diff --git a/makefile b/makefile
index 49855723..a9490428 100644
--- a/makefile
+++ b/makefile
@@ -5,10 +5,10 @@ check_dirs := src
 # run tests
 
 unit-test:
-	python3 -m pytest -s -v ./tests/unit
+	RUN_SLOW=True python3 -m pytest -s -v tests/unit -n 10 --log-cli-level='ERROR'
 
 integ-test:
-	python3 -m pytest -s -v ./tests/integ/
+	python3 -m pytest -s -v tests/integ/
 
 # Check that source code meets quality standards
 
@@ -18,4 +18,13 @@ quality:
 # Format source code automatically
 
 style: 
-	ruff $(check_dirs) --fix
\ No newline at end of file
+	ruff $(check_dirs) --fix
+
+inference-pytorch-gpu:
+	docker build -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:gpu .
+
+inference-pytorch-cpu:
+	docker build --build-arg="BASE_IMAGE=ubuntu:22.04" -f dockerfiles/pytorch/Dockerfile -t integration-test-pytorch:cpu .
+
+stop-all:
+	docker stop $$(docker ps -a -q) && docker container prune --force
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 96ef9084..a692967f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ no_implicit_optional = true
 scripts_are_modules = true
 
 [tool.ruff]
-select = [
+lint.select = [
   "E", # pycodestyle errors
   "W", # pycodestyle warnings
   "F", # pyflakes
@@ -12,8 +12,8 @@ select = [
   "C", # flake8-comprehensions
   "B", # flake8-bugbear
 ]
-ignore = [
-  "E501", # line too long, handled by black
+lint.ignore = [
+  "E501", # Line length (handled by ruff-format)
   "B008", # do not perform function calls in argument defaults
   "C901", # too complex
 ]
@@ -21,14 +21,13 @@ ignore = [
 line-length = 119
 
 # Allow unused variables when underscore-prefixed.
-dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
-# Assume Python 3.8.
-target-version = "py39"
+# Assume Python 3.11.
+target-version = "py311"
 
-[tool.ruff.per-file-ignores]
-"__init__.py" = ["F401"]
+lint.per-file-ignores = {"__init__.py" = ["F401"]}
 
 [tool.isort]
 profile = "black"
-known_third_party = ["transforemrs", "starlette", "huggingface_hub"]
+known_third_party = ["transformers", "starlette", "huggingface_hub"]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 8a178f8d..e69de29b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +0,0 @@
-orjson
-starlette
-uvicorn
-pandas
-huggingface_hub>=0.13.2
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 924033ba..21085086 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -12,7 +12,6 @@ known_third_party =
     torch
     robyn
 
-
 line_length = 119
 lines_after_imports = 2
 multi_line_output = 3
diff --git a/setup.py b/setup.py
index 92132915..bdd64fba 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,11 @@
 from __future__ import absolute_import
-import os
 from datetime import date
 from setuptools import find_packages, setup
 
 # We don't declare our dependency on transformers here because we build with
 # different packages for different variants
 
-VERSION = "0.1.0"
+VERSION = "0.3.0"
 
 
 # Ubuntu packages
@@ -15,33 +14,33 @@
 # libavcodec-extra : libavcodec-extra  inculdes additional codecs for ffmpeg
 
 install_requires = [
-    # transformers
-    "transformers[sklearn,sentencepiece]>=4.25.1",
-    "huggingface_hub>=0.13.3",
-    # api stuff
+    "wheel==0.42.0",
+    "setuptools==69.1.0",
+    "cmake==3.28.3",
+    "transformers[sklearn,sentencepiece, audio, vision]==4.38.1",
+    "huggingface_hub==0.20.3",
     "orjson",
-    # "robyn",
     # vision
     "Pillow",
-    # speech + torchaudio
     "librosa",
+    # speech + torchaudio
     "pyctcdecode>=0.3.0",
     "phonemizer",
+    "ffmpeg",
+    # web api
+    "starlette",
+    "uvicorn",
+    "pandas"
 ]
 
 extras = {}
 
-extras["st"] = ["sentence_transformers"]
-extras["diffusers"] = ["diffusers==0.8.1", "accelerate==0.14.0"]
-
-
-# Hugging Face specific dependencies
-# framework specific dependencies
-extras["torch"] = ["torch>=1.8.0", "torchaudio"]
-extras["tensorflow"] = ["tensorflow==2.9.0"]
-# test and quality
+extras["st"] = ["sentence_transformers==2.4.0"]
+extras["diffusers"] = ["diffusers==0.26.3", "accelerate==0.27.2"]
+extras["torch"] = ["torch==2.2.0", "torchvision", "torchaudio"]
+extras["tensorflow"] = ["tensorflow"]
 extras["test"] = [
-    "pytest",
+    "pytest==7.2.1",
     "pytest-xdist",
     "parameterized",
     "psutil",
@@ -50,12 +49,11 @@
     "mock==2.0.0",
     "docker",
     "requests",
+    "tenacity"
 ]
 extras["quality"] = [
-    "black",
     "isort",
-    "flake8",
-    "ruff",
+    "ruff"
 ]
 
 setup(
@@ -63,9 +61,6 @@
     version=VERSION,
     author="HuggingFace",
     description=".",
-    # long_description=open("README.md", "r", encoding="utf-8").read(),
-    # long_description_content_type="text/markdown",
-    # keywords="NLP deep-learning transformer pytorch tensorflow BERT GPT GPT-2 AWS Amazon SageMaker Cloud",
     url="",
     package_dir={"": "src"},
     packages=find_packages(where="src"),
@@ -82,7 +77,7 @@
         "License :: OSI Approved :: Apache Software License",
         "Operating System :: OS Independent",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.11",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
 )
diff --git a/src/huggingface_inference_toolkit/diffusers_utils.py b/src/huggingface_inference_toolkit/diffusers_utils.py
index 7068df9d..521a85df 100644
--- a/src/huggingface_inference_toolkit/diffusers_utils.py
+++ b/src/huggingface_inference_toolkit/diffusers_utils.py
@@ -32,6 +32,7 @@ def __init__(self, model_dir: str, device: str = None):  # needs "cuda" for GPU
                 self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(self.pipeline.scheduler.config)
             except Exception:
                 pass
+
         self.pipeline.to(device)
 
     def __call__(
diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py
index 097a12c9..08368326 100644
--- a/src/huggingface_inference_toolkit/handler.py
+++ b/src/huggingface_inference_toolkit/handler.py
@@ -10,11 +10,16 @@
 
 class HuggingFaceHandler:
     """
-    A Default Hugging Face Inference Handler which works with all transformers pipelines, Sentence Transformers and Optimum.
+    A Default Hugging Face Inference Handler which works with all
+    transformers pipelines, Sentence Transformers and Optimum.
     """
 
-    def __init__(self, model_dir: Union[str, Path], task=None):
-        self.pipeline = get_pipeline(model_dir=model_dir, task=task)
+    def __init__(self, model_dir: Union[str, Path], task=None, framework="pt"):
+        self.pipeline = get_pipeline(
+            model_dir=model_dir,
+            task=task,
+            framework=framework
+        )
 
     def __call__(self, data):
         """
@@ -25,6 +30,7 @@ def __call__(self, data):
         """
         inputs = data.pop("inputs", data)
         parameters = data.pop("parameters", None)
+
         # pass inputs with all kwargs in data
         if parameters is not None:
             prediction = self.pipeline(inputs, **parameters)
@@ -34,7 +40,10 @@ def __call__(self, data):
         return prediction
 
 
-def get_inference_handler_either_custom_or_default_handler(model_dir: Path, task: Optional[str] = None):
+def get_inference_handler_either_custom_or_default_handler(
+    model_dir: Path,
+    task: Optional[str] = None
+):
     """
     get inference handler either custom or default Handler
     """
diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py
index 2a3c0055..72bb2ee2 100644
--- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py
+++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py
@@ -47,7 +47,12 @@ def __call__(self, inputs):
 }
 
 
-def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **kwargs):
+def get_sentence_transformers_pipeline(
+    task=None,
+    model_dir=None,
+    device=-1,
+    **kwargs
+):
     device = "cuda" if device == 0 else "cpu"
     pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device)
     return pipeline
diff --git a/src/huggingface_inference_toolkit/serialization/base.py b/src/huggingface_inference_toolkit/serialization/base.py
index eb965b64..dc7d6839 100644
--- a/src/huggingface_inference_toolkit/serialization/base.py
+++ b/src/huggingface_inference_toolkit/serialization/base.py
@@ -42,15 +42,21 @@ def get_deserializer(content_type):
         if content_type in content_type_mapping:
             return content_type_mapping[content_type]
         else:
-            raise Exception(
-                f'Content type "{content_type}" not supported. Supported content types are: {", ".join(list(content_type_mapping.keys()))}'
-            )
+            message = f"""
+                Content type "{content_type}" not supported.
+                Supported content types are:
+                {", ".join(list(content_type_mapping.keys()))}
+            """
+            raise Exception(message)
 
     @staticmethod
     def get_serializer(accept):
         if accept in content_type_mapping:
             return content_type_mapping[accept]
         else:
-            raise Exception(
-                f'Accept type "{accept}" not supported. Supported accept types are: {", ".join(list(content_type_mapping.keys()))}'
-            )
+            message = f"""
+                Accept type "{accept}" not supported.
+                Supported accept types are:
+                {", ".join(list(content_type_mapping.keys()))}
+            """
+            raise Exception(message)
diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py
index ffe8d2c3..1570317b 100644
--- a/src/huggingface_inference_toolkit/utils.py
+++ b/src/huggingface_inference_toolkit/utils.py
@@ -7,7 +7,7 @@
 from huggingface_hub import HfApi, login, snapshot_download
 from transformers import WhisperForConditionalGeneration, pipeline
 from transformers.file_utils import is_tf_available, is_torch_available
-from transformers.pipelines import Conversation, Pipeline
+from transformers.pipelines import Pipeline
 
 from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME
 from huggingface_inference_toolkit.diffusers_utils import (
@@ -75,19 +75,12 @@ def wrap_conversation_pipeline(pipeline):
     """
 
     def wrapped_pipeline(inputs, *args, **kwargs):
-        converted_input = Conversation(
-            inputs["text"],
-            past_user_inputs=inputs.get("past_user_inputs", []),
-            generated_responses=inputs.get("generated_responses", []),
-        )
-        prediction = pipeline(converted_input, *args, **kwargs)
-        return {
-            "generated_text": prediction.generated_responses[-1],
-            "conversation": {
-                "past_user_inputs": prediction.past_user_inputs,
-                "generated_responses": prediction.generated_responses,
-            },
-        }
+        logger.info(f"Inputs: {inputs}")
+        logger.info(f"Args: {args}")
+        logger.info(f"KWArgs: {kwargs}")
+        prediction = pipeline(inputs, *args, **kwargs)
+        logger.info(f"Prediction: {prediction}")
+        return list(prediction)
 
     return wrapped_pipeline
 
@@ -112,6 +105,7 @@ def _get_framework():
     """
     extracts which DL framework is used for inference, if both are installed use pytorch
     """
+
     if is_torch_available():
         return "pytorch"
     elif is_tf_available():
@@ -134,6 +128,7 @@ def _load_repository_from_hf(
     """
     Load a model from huggingface hub.
     """
+
     if hf_hub_token is not None:
         login(token=hf_hub_token)
 
@@ -157,13 +152,14 @@ def _load_repository_from_hf(
     ignore_regex = create_artifact_filter(framework)
     logger.info(f"Ignore regex pattern for files, which are not downloaded: { ', '.join(ignore_regex) }")
 
-    # Download the repository to the workdir and filter out non-framework specific weights
+    # Download the repository to the workdir and filter out non-framework
+    # specific weights
     snapshot_download(
-        repository_id,
-        revision=revision,
-        local_dir=str(target_dir),
-        local_dir_use_symlinks=False,
-        ignore_patterns=ignore_regex,
+        repo_id = repository_id,
+        revision = revision,
+        local_dir = str(target_dir),
+        local_dir_use_symlinks = False,
+        ignore_patterns = ignore_regex,
     )
 
     return target_dir
@@ -188,9 +184,12 @@ def check_and_register_custom_pipeline_from_directory(model_dir):
             spec.loader.exec_module(handler)
             # init custom handler with model_dir
             custom_pipeline = handler.EndpointHandler(model_dir)
+
     elif legacy_module.is_file():
         logger.warning(
-            "You are using a legacy custom pipeline with. Please update to the new format. See documentation for more information."
+            """You are using a legacy custom pipeline.
+            Please update to the new format.
+            See documentation for more information."""
         )
         spec = importlib.util.spec_from_file_location("pipeline.PreTrainedPipeline", legacy_module)
         if spec:
@@ -212,13 +211,20 @@ def get_device():
     """
     The get device function will return the device for the DL Framework.
     """
-    if _is_gpu_available():
+    gpu = _is_gpu_available()
+
+    if gpu:
         return 0
     else:
         return -1
 
 
-def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline:
+def get_pipeline(
+    task: str,
+    model_dir: Path,
+    framework = "pytorch",
+    **kwargs,
+) -> Pipeline:
     """
     create pipeline class for a specific task based on local saved model
     """
@@ -229,7 +235,8 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline:
         raise EnvironmentError(
             "The task for this model is not set: Please set one: https://huggingface.co/docs#how-is-a-models-type-of-inference-api-and-widget-determined"
         )
-    # define tokenizer or feature extractor as kwargs to load it the pipeline correctly
+    # define tokenizer or feature extractor as kwargs to load it the pipeline
+    # correctly
     if task in {
         "automatic-speech-recognition",
         "image-segmentation",
@@ -244,37 +251,50 @@ def get_pipeline(task: str, model_dir: Path, **kwargs) -> Pipeline:
     else:
         kwargs["tokenizer"] = model_dir
 
-    # add check for optimum accelerated pipeline
     if is_optimum_available():
-        # TODO: add check for optimum accelerated pipeline
-        logger.info("Optimum is not implement yet using default pipeline.")
+        logger.info("Optimum is not implemented yet using default pipeline.")
         hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs)
     elif is_sentence_transformers_available() and task in [
         "sentence-similarity",
         "sentence-embeddings",
         "sentence-ranking",
     ]:
-        hf_pipeline = get_sentence_transformers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs)
+        hf_pipeline = get_sentence_transformers_pipeline(
+            task=task,
+            model_dir=model_dir,
+            device=device,
+            **kwargs
+        )
     elif is_diffusers_available() and task == "text-to-image":
-        hf_pipeline = get_diffusers_pipeline(task=task, model_dir=model_dir, device=device, **kwargs)
+        hf_pipeline = get_diffusers_pipeline(
+            task=task,
+            model_dir=model_dir,
+            device=device,
+            **kwargs
+        )
     else:
-        hf_pipeline = pipeline(task=task, model=model_dir, device=device, **kwargs)
+        hf_pipeline = pipeline(
+            task=task,
+            model=model_dir,
+            device=device,
+            **kwargs
+        )
 
-    # wrapp specific pipeline to support better ux
+    # wrap specific pipeline to support better ux
     if task == "conversational":
         hf_pipeline = wrap_conversation_pipeline(hf_pipeline)
-    elif task == "automatic-speech-recognition" and isinstance(hf_pipeline.model, WhisperForConditionalGeneration):
+
+    elif task == "automatic-speech-recognition" and isinstance(
+        hf_pipeline.model,
+        WhisperForConditionalGeneration
+    ):
         # set chunk length to 30s for whisper to enable long audio files
         hf_pipeline._preprocess_params["chunk_length_s"] = 30
-        hf_pipeline._preprocess_params["ignore_warning"] = True
-        # set decoder to english by default
-        # TODO: replace when transformers 4.26.0 is release with
-        # hf_pipeline.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=lang, task="transcribe")
-        hf_pipeline.tokenizer.language = "english"
-        hf_pipeline.tokenizer.task = "transcribe"
-        hf_pipeline.model.config.forced_decoder_ids = [
-            (rank + 1, token) for rank, token in enumerate(hf_pipeline.tokenizer.prefix_tokens[1:])
-        ]
+        hf_pipeline.model.config.forced_decoder_ids = hf_pipeline.tokenizer.get_decoder_prompt_ids(
+            language="english",
+            task="transcribe"
+        )
+
     return hf_pipeline
 
 
diff --git a/src/huggingface_inference_toolkit/webservice_robyn.py b/src/huggingface_inference_toolkit/webservice_robyn.py
index a1c437af..5aeaf605 100644
--- a/src/huggingface_inference_toolkit/webservice_robyn.py
+++ b/src/huggingface_inference_toolkit/webservice_robyn.py
@@ -21,7 +21,10 @@
 
 # if empty_directory_or_not_hf_remote_id is None or task is None:
 #     raise ValueError(
-#         f"Can't initialize model. Please set correct model id and task. provided values are model_id:{model_id_or_path} and task:{task}"
+#         f"""Can't initialize model.
+#             Please set correct model id and task.
+#             Provided values are model_id:
+#             {model_id_or_path} and task:{task}"""
 #     )
 
 # logger.info(f"Initializing model with model_id:{model_id_or_path} and task:{task}")
diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py
index 64935925..8bc68b2e 100644
--- a/src/huggingface_inference_toolkit/webservice_starlette.py
+++ b/src/huggingface_inference_toolkit/webservice_starlette.py
@@ -49,7 +49,10 @@ async def some_startup_task():
             )
         else:
             raise ValueError(
-                f"Can't initialize model. Please set env HF_MODEL_DIR or provider a HF_MODEL_ID. Provided values are HF_MODEL_DIR:{HF_MODEL_DIR} and HF_MODEL_ID:{HF_MODEL_ID}"
+                f"""Can't initialize model.
+                Please set env HF_MODEL_DIR or provider a HF_MODEL_ID.
+                Provided values are:
+                HF_MODEL_DIR: {HF_MODEL_DIR} and HF_MODEL_ID:{HF_MODEL_ID}"""
             )
 
     logger.info(f"Initializing model from directory:{HF_MODEL_DIR}")
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integ/config.py b/tests/integ/config.py
index 467afde2..b1d4d605 100644
--- a/tests/integ/config.py
+++ b/tests/integ/config.py
@@ -1,6 +1,6 @@
 import os
 
-from integ.utils import (
+from tests.integ.utils import (
     validate_automatic_speech_recognition,
     validate_classification,
     validate_feature_extraction,
@@ -14,6 +14,8 @@
     validate_text_to_image,
     validate_translation,
     validate_zero_shot_classification,
+    validate_custom,
+    validate_conversational
 )
 
 
@@ -63,32 +65,25 @@
         "tensorflow": "hf-internal-testing/tiny-random-vit",
     },
     "automatic-speech-recognition": {
-        "pytorch": "hf-internal-testing/tiny-random-wav2vec2",
+        "pytorch": "hf-internal-testing/tiny-random-Wav2Vec2Model",
         "tensorflow": None,
     },
     "audio-classification": {
-        "pytorch": "hf-internal-testing/tiny-random-wavlm",
+        "pytorch": "hf-internal-testing/tiny-random-WavLMModel",
         "tensorflow": None,
     },
     "object-detection": {
         "pytorch": "hustvl/yolos-tiny",
         "tensorflow": None,
     },
-    "image-segmentation": {
-        "pytorch": "hf-internal-testing/tiny-random-beit-pipeline",
-        "tensorflow": None,
-    },
-    "table-question-answering": {
-        "pytorch": "philschmid/tapex-tiny",
-        "tensorflow": None,
-    },
     "zero-shot-image-classification": {
         "pytorch": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
         "tensorflow": "hf-internal-testing/tiny-random-clip-zero-shot-image-classification",
     },
     "conversational": {
-        "pytorch": "hf-internal-testing/tiny-random-blenderbot",
-        "tensorflow": "hf-internal-testing/tiny-random-blenderbot",
+        #"pytorch": "hf-internal-testing/tiny-random-blenderbot-small",
+        "pytorch": "microsoft/DialoGPT-small",
+        "tensorflow": None,
     },
     "sentence-similarity": {
         "pytorch": "sentence-transformers/all-MiniLM-L6-v2",
@@ -106,6 +101,14 @@
         "pytorch": "hf-internal-testing/tiny-stable-diffusion-torch",
         "tensorflow": None,
     },
+    "table-question-answering": {
+        "pytorch": "philschmid/tapex-tiny",
+        "tensorflow": None,
+    },
+    "image-segmentation": {
+        "pytorch": "hf-internal-testing/tiny-random-beit-pipeline",
+        "tensorflow": None,
+    },
 }
 
 
@@ -149,19 +152,27 @@
             },
         }
     },
-    "conversational": {
-        "inputs": {
-            "past_user_inputs": ["Which movie is the best ?"],
-            "generated_responses": ["It's Die Hard for sure."],
-            "text": "Can you explain why?",
+    "conversational": {"inputs": [
+        {
+            "role": "user",
+            "content": "Which movie is the best ?"
+        },
+        {
+            "role": "assistant",
+            "content": "It's Die Hard for sure."
+        },
+        {
+            "role": "user",
+            "content": "Can you explain why?"
         }
-    },
+    ]},
     "sentence-similarity": {
         "inputs": {"source_sentence": "Lets create an embedding", "sentences": ["Lets create an embedding"]}
     },
     "sentence-embeddings": {"inputs": "Lets create an embedding"},
     "sentence-ranking": {"inputs": ["Lets create an embedding", "Lets create an embedding"]},
     "text-to-image": {"inputs": "a man on a horse jumps over a broken down airplane."},
+    "custom": {"inputs": "this is a test"}
 }
 
 task2output = {
@@ -206,11 +217,17 @@
     "object-detection": [{"score": 0.9143241047859192, "label": "cat", "box": {}}],
     "image-segmentation": [{"score": 0.9143241047859192, "label": "cat", "mask": {}}],
     "table-question-answering": {"answer": "36542"},
-    "conversational": {"generated_text": "", "conversation": {}},
+    "conversational": [
+        {'role': 'user', 'content': 'Which movie is the best ?'},
+        {'role': 'assistant', 'content': "It's Die Hard for sure."},
+        {'role': 'user', 'content': 'Can you explain why?'},
+        {'role': 'assistant', 'content': "It's a great movie."},
+    ],
     "sentence-similarity": {"similarities": ""},
     "sentence-embeddings": {"embeddings": ""},
     "sentence-ranking": {"scores": ""},
     "text-to-image": bytes,
+    "custom": {"inputs": "this is a test"}
 }
 
 
@@ -232,9 +249,10 @@
     "object-detection": validate_object_detection,
     "image-segmentation": validate_object_detection,
     "table-question-answering": validate_zero_shot_classification,
-    "conversational": validate_zero_shot_classification,
+    "conversational": validate_conversational,
     "sentence-similarity": validate_zero_shot_classification,
     "sentence-embeddings": validate_zero_shot_classification,
     "sentence-ranking": validate_zero_shot_classification,
     "text-to-image": validate_text_to_image,
+    "custom": validate_custom
 }
diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py
new file mode 100644
index 00000000..ec282ea8
--- /dev/null
+++ b/tests/integ/conftest.py
@@ -0,0 +1,159 @@
+import docker
+import pytest
+import random
+import logging
+from tests.integ.config import task2model
+import tenacity
+import time
+from huggingface_inference_toolkit.utils import (
+    _is_gpu_available,
+    _load_repository_from_hf
+)
+from transformers.testing_utils import (
+    slow,
+    _run_slow_tests
+)
+import uuid
+import socket
+import os
+
+HF_HUB_CACHE = os.environ.get("HF_HUB_CACHE", "/home/ubuntu/.cache/huggingface/hub")
+IS_GPU = _run_slow_tests
+DEVICE = "gpu" if IS_GPU else "cpu"
+
+@tenacity.retry(
+    retry = tenacity.retry_if_exception(docker.errors.APIError),
+    stop = tenacity.stop_after_attempt(10)
+)
+@pytest.fixture(scope = "function")
+def remote_container(
+    device,
+    task,
+    framework
+):
+    time.sleep(random.randint(1, 5))
+    #client = docker.DockerClient(base_url='unix://var/run/docker.sock')
+    client = docker.from_env()
+    container_name = f"integration-test-{framework}-{task}-{device}"
+    container_image = f"integration-test-{framework}:{device}"
+    port = random.randint(5000, 9000)
+    model = task2model[task][framework]
+
+    #check if port is already open
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    while sock.connect_ex(("localhost", port)) == 0:
+        logging.debug(f"Port {port} is already being used; getting a new one...")
+        port = random.randint(5000, 9000)
+
+    logging.debug(f"Image: {container_image}")
+    logging.debug(f"Port: {port}")
+
+    device_request = [
+        docker.types.DeviceRequest(
+            count=-1,
+            capabilities=[["gpu"]])
+    ] if device == "gpu" else []
+
+    yield client.containers.run(
+        image = container_image,
+        name=container_name,
+        ports={"5000": port},
+        environment={
+            "HF_MODEL_ID": model,
+            "HF_TASK": task,
+            "CUDA_LAUNCH_BLOCKING": 1
+        },
+        detach=True,
+        # GPU
+        device_requests=device_request,
+    ), port
+
+    #Teardown
+    previous = client.containers.get(container_name)
+    previous.stop()
+    previous.remove()
+
+
+@tenacity.retry(
+    stop = tenacity.stop_after_attempt(10),
+    reraise = True
+)
+@pytest.fixture(scope = "function")
+def local_container(
+    device,
+    task,
+    repository_id,
+    framework
+):
+    try:
+        time.sleep(random.randint(1, 5))
+        id = uuid.uuid4()
+        if not (task == "custom"):
+            model = task2model[task][framework]
+            id = task
+        else:
+            model = repository_id
+
+        logging.info(f"Starting container with model: {model}")
+
+        if not model:
+            message = f"No model supported for {framework}"
+            logging.error(message)
+            raise ValueError(message)
+        
+        logging.info(f"Starting container with Model = {model}")
+        client = docker.from_env()
+        container_name = f"integration-test-{framework}-{id}-{device}"
+        container_image = f"integration-test-{framework}:{device}"
+
+        port = random.randint(5000, 9000)
+
+        #check if port is already open
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        while sock.connect_ex(("localhost", port)) == 0:
+            logging.debug(f"Port {port} is already being used; getting a new one...")
+            port = random.randint(5000, 9000)
+
+        logging.debug(f"Image: {container_image}")
+        logging.debug(f"Port: {port}")
+
+        device_request = [
+            docker.types.DeviceRequest(count=-1, capabilities=[["gpu"]])
+        ] if device == "gpu" else []
+
+        object_id = model.replace("/", "--")
+        model_dir = f"{HF_HUB_CACHE}/{object_id}"
+
+        storage_dir = _load_repository_from_hf(
+            repository_id = model,
+            target_dir = model_dir,
+            framework = framework
+        )
+
+        yield client.containers.run(
+            container_image,
+            name=container_name,
+            ports={"5000": port},
+            environment={
+                "HF_MODEL_DIR": "/opt/huggingface/model",
+                "HF_TASK": task
+            },
+            volumes = {
+                model_dir: {
+                    "bind": "/opt/huggingface/model",
+                    "mode": "ro"
+                }
+            },
+            detach=True,
+            # GPU
+            device_requests=device_request,
+        ), port
+
+        #Teardown
+        previous = client.containers.get(container_name)
+        previous.stop()
+        previous.remove()
+    except Exception as exception:
+        logging.error(f"Error starting container: {str(exception)}")
+        raise exception
+
diff --git a/tests/integ/test_container.py b/tests/integ/helpers.py
similarity index 79%
rename from tests/integ/test_container.py
rename to tests/integ/helpers.py
index 6c343c6a..0dae2598 100644
--- a/tests/integ/test_container.py
+++ b/tests/integ/helpers.py
@@ -1,20 +1,35 @@
 import random
 import tempfile
 import time
-
 import docker
 import pytest
 import requests
-from docker.client import DockerClient
-from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf
-from integ.config import task2input, task2model, task2output, task2validation
-from transformers.testing_utils import require_torch, slow, require_tf, _run_slow_tests
+from huggingface_inference_toolkit.utils import (
+    _is_gpu_available,
+    _load_repository_from_hf
+)
+from tests.integ.config import (
+    task2input,
+    task2model,
+    task2output,
+    task2validation
+)
+from transformers.testing_utils import (
+    require_torch,
+    slow,
+    require_tf,
+    _run_slow_tests
+)
+import tenacity
+from docker import DockerClient
+import logging
+import traceback
+import urllib3
 
 IS_GPU = _run_slow_tests
 DEVICE = "gpu" if IS_GPU else "cpu"
 
-client = docker.from_env()
-
+client = docker.DockerClient(base_url='unix://var/run/docker.sock')
 
 def make_sure_other_containers_are_stopped(client: DockerClient, container_name: str):
     try:
@@ -25,44 +40,89 @@ def make_sure_other_containers_are_stopped(client: DockerClient, container_name:
         return None
 
 
-def wait_for_container_to_be_ready(base_url):
-    t = 0
-    while t < 10:
+#@tenacity.retry(
+#    retry = tenacity.retry_if_exception(ValueError),
+#    stop = tenacity.stop_after_attempt(10),
+#    reraise = True
+#)
+def wait_for_container_to_be_ready(
+    base_url,
+    time_between_retries = 1,
+    max_retries = 30
+):
+    
+    retries = 0
+    error = None
+
+    while retries < max_retries:
+        time.sleep(time_between_retries)
         try:
             response = requests.get(f"{base_url}/health")
             if response.status_code == 200:
-                break
-        except Exception:
-            pass
-        finally:
-            t += 1
-            time.sleep(2)
-    return True
-
+                logging.info("Container ready!")
+                return True
+            else:
+                raise ConnectionError(f"Error: {response.status_code}")
+        except Exception as exception:
+            error = exception
+            logging.warning(f"Container at {base_url} not ready, trying again...")
+        retries += 1
+    
+    logging.error(f"Unable to start container: {str(error)}")
+    raise error
 
-def verify_task(container: DockerClient, task: str, port: int = 5000, framework: str = "pytorch"):
+def verify_task(
+    #container: DockerClient,
+    task: str,
+    port: int = 5000,
+    framework: str = "pytorch"
+):
     BASE_URL = f"http://localhost:{port}"
+    logging.info(f"Base URL: {BASE_URL}")
+    logging.info(f"Port: {port}")
     input = task2input[task]
-    # health check
-    wait_for_container_to_be_ready(BASE_URL)
-    if (
-        task == "image-classification"
-        or task == "object-detection"
-        or task == "image-segmentation"
-        or task == "zero-shot-image-classification"
-    ):
-        prediction = requests.post(
-            f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"}
-        ).json()
-    elif task == "automatic-speech-recognition" or task == "audio-classification":
-        prediction = requests.post(
-            f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"}
-        ).json()
-    elif task == "text-to-image":
-        prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content
-    else:
-        prediction = requests.post(f"{BASE_URL}", json=input).json()
-    assert task2validation[task](result=prediction, snapshot=task2output[task]) is True
+
+    try:
+        # health check
+        wait_for_container_to_be_ready(BASE_URL)
+        if (
+            task == "image-classification"
+            or task == "object-detection"
+            or task == "image-segmentation"
+            or task == "zero-shot-image-classification"
+        ):
+            prediction = requests.post(
+                f"{BASE_URL}", data=task2input[task], headers={"content-type": "image/x-image"}
+            ).json()
+        elif task == "automatic-speech-recognition" or task == "audio-classification":
+            prediction = requests.post(
+                f"{BASE_URL}", data=task2input[task], headers={"content-type": "audio/x-audio"}
+            ).json()
+        elif task == "text-to-image":
+            prediction = requests.post(f"{BASE_URL}", json=input, headers={"accept": "image/png"}).content
+
+        else:
+            prediction = requests.post(f"{BASE_URL}", json=input).json()
+        
+        logging.info(f"Input: {input}")
+        logging.info(f"Prediction: {prediction}")
+        logging.info(f"Snapshot: {task2output[task]}")
+
+        if task == "conversational":
+            for message in prediction:
+                assert "error" not in message.keys()
+        else:
+            assert task2validation[task](
+                result=prediction,
+                snapshot=task2output[task]
+            )
+    except Exception as exception:
+        logging.error(f"Base URL: {BASE_URL}")
+        logging.error(f"Task: {task}")
+        logging.error(f"Input: {input}")
+        logging.error(f"Error: {str(exception)}")
+        logging.error(f"Stack: {traceback.format_exc()}")
+        raise exception
 
 
 @require_torch
@@ -114,9 +174,9 @@ def test_pt_container_remote_model(task) -> None:
         # GPU
         device_requests=device_request,
     )
-    # time.sleep(5)
+    time.sleep(5)
 
-    verify_task(container, task, port)
+    verify_task(task = task, port = port)
     container.stop()
     container.remove()
 
diff --git a/tests/integ/test_pytorch_local_cpu.py b/tests/integ/test_pytorch_local_cpu.py
new file mode 100644
index 00000000..17e651e9
--- /dev/null
+++ b/tests/integ/test_pytorch_local_cpu.py
@@ -0,0 +1,127 @@
+import tempfile
+from tests.integ.helpers import verify_task
+from tests.integ.config import (
+    task2input,
+    task2model,
+    task2output,
+    task2validation
+)
+from transformers.testing_utils import (
+    require_torch,
+    slow,
+    _run_slow_tests
+)
+import pytest
+
+class TestPytorchLocal:
+
+    @require_torch
+    @pytest.mark.parametrize(
+        "task",
+        [
+            "text-classification",
+            "zero-shot-classification",
+            "ner",
+            "question-answering",
+            "fill-mask",
+            "summarization",
+            "translation_xx_to_yy",
+            "text2text-generation",
+            "text-generation",
+            "feature-extraction",
+            "image-classification",
+            "automatic-speech-recognition",
+            "audio-classification",
+            "object-detection",
+            "image-segmentation",
+            "table-question-answering",
+            "conversational",
+            "sentence-similarity",
+            "sentence-embeddings",
+            "sentence-ranking",
+            "text-to-image",
+        ],
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["cpu"]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.parametrize(
+        "repository_id",
+        [""]
+    )
+    @pytest.mark.usefixtures('local_container')
+    def test_pt_container_local_model(
+        self,
+        local_container,
+        task,
+        framework,
+        device
+    ) -> None:
+
+        verify_task(task = task, port = local_container[1])
+
+
+    @require_torch
+    @pytest.mark.parametrize(
+        "repository_id",
+        ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"],
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["cpu"]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.parametrize(
+        "task",
+        ["custom"]
+    )
+    @pytest.mark.usefixtures('local_container')
+    def test_pt_container_custom_handler(
+        self,
+        local_container,
+        task,
+        device,
+        repository_id
+    ) -> None:
+        
+        verify_task(
+            task = task,
+            port = local_container[1],
+        )
+
+
+    @require_torch
+    @pytest.mark.parametrize(
+        "repository_id",
+        ["philschmid/custom-pipeline-text-classification"],
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["cpu"]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.parametrize(
+        "task",
+        ["custom"]
+    )
+    @pytest.mark.usefixtures('local_container')
+    def test_pt_container_legacy_custom_pipeline(
+        self,
+        local_container,
+        repository_id,
+        device,
+        task
+    ) -> None:
+
+        verify_task(task = task, port = local_container[1])
diff --git a/tests/integ/test_pytorch_local_gpu.py b/tests/integ/test_pytorch_local_gpu.py
new file mode 100644
index 00000000..15ffebde
--- /dev/null
+++ b/tests/integ/test_pytorch_local_gpu.py
@@ -0,0 +1,127 @@
+import tempfile
+from tests.integ.helpers import verify_task
+from tests.integ.config import (
+    task2input,
+    task2model,
+    task2output,
+    task2validation
+)
+from transformers.testing_utils import (
+    require_torch,
+    slow,
+    _run_slow_tests
+)
+import pytest
+
+class TestPytorchLocal:
+
+    @require_torch
+    @pytest.mark.parametrize(
+        "task",
+        [
+            "text-classification",
+            "zero-shot-classification",
+            "ner",
+            "question-answering",
+            "fill-mask",
+            "summarization",
+            "translation_xx_to_yy",
+            "text2text-generation",
+            "text-generation",
+            "feature-extraction",
+            "image-classification",
+            "automatic-speech-recognition",
+            "audio-classification",
+            "object-detection",
+            "image-segmentation",
+            "table-question-answering",
+            "conversational",
+            "sentence-similarity",
+            "sentence-embeddings",
+            "sentence-ranking",
+            "text-to-image",
+        ],
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["gpu"]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.parametrize(
+        "repository_id",
+        [""]
+    )
+    @pytest.mark.usefixtures('local_container')
+    def test_pt_container_local_model(
+        self,
+        local_container,
+        task,
+        framework,
+        device
+    ) -> None:
+
+        verify_task(task = task, port = local_container[1])
+
+
+    @require_torch
+    @pytest.mark.parametrize(
+        "repository_id",
+        ["philschmid/custom-handler-test", "philschmid/custom-handler-distilbert"],
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["gpu"]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.parametrize(
+        "task",
+        ["custom"]
+    )
+    @pytest.mark.usefixtures('local_container')
+    def test_pt_container_custom_handler(
+        self,
+        local_container,
+        task,
+        device,
+        repository_id
+    ) -> None:
+        
+        verify_task(
+            task = task,
+            port = local_container[1],
+        )
+
+
+    @require_torch
+    @pytest.mark.parametrize(
+        "repository_id",
+        ["philschmid/custom-pipeline-text-classification"],
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["gpu"]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.parametrize(
+        "task",
+        ["custom"]
+    )
+    @pytest.mark.usefixtures('local_container')
+    def test_pt_container_legacy_custom_pipeline(
+        self,
+        local_container,
+        repository_id,
+        device,
+        task
+    ) -> None:
+
+        verify_task(task = task, port = local_container[1])
diff --git a/tests/integ/test_pytorch_remote_cpu.py b/tests/integ/test_pytorch_remote_cpu.py
new file mode 100644
index 00000000..14001dda
--- /dev/null
+++ b/tests/integ/test_pytorch_remote_cpu.py
@@ -0,0 +1,62 @@
+import tempfile
+from tests.integ.helpers import verify_task
+from tests.integ.config import (
+    task2input,
+    task2model,
+    task2output,
+    task2validation
+)
+from transformers.testing_utils import (
+    require_torch,
+    slow,
+    _run_slow_tests
+)
+import pytest
+import tenacity
+import docker
+
+class TestPytorchRemote:
+
+    @tenacity.retry(
+        retry = tenacity.retry_if_exception(docker.errors.APIError),
+        stop = tenacity.stop_after_attempt(5),
+        reraise = True
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["cpu"]
+    )
+    @pytest.mark.parametrize(
+        "task",
+        [
+            "text-classification",
+            "zero-shot-classification",
+            "question-answering",
+            "fill-mask",
+            "summarization",
+            "ner",
+            "translation_xx_to_yy",
+            "text2text-generation",
+            "text-generation",
+            "feature-extraction",
+            "image-classification",
+            "automatic-speech-recognition",
+            "audio-classification",
+            "object-detection",
+            "image-segmentation",
+            "table-question-answering",
+            "conversational",
+            "sentence-similarity",
+            "sentence-embeddings",
+            "sentence-ranking",
+            "text-to-image"
+        ]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.usefixtures('remote_container')
+    def test_inference_remote(self, remote_container, task, framework, device):
+
+        verify_task(task = task, port = remote_container[1])
diff --git a/tests/integ/test_pytorch_remote_gpu.py b/tests/integ/test_pytorch_remote_gpu.py
new file mode 100644
index 00000000..ec79f4a5
--- /dev/null
+++ b/tests/integ/test_pytorch_remote_gpu.py
@@ -0,0 +1,62 @@
+import tempfile
+from tests.integ.helpers import verify_task
+from tests.integ.config import (
+    task2input,
+    task2model,
+    task2output,
+    task2validation
+)
+from transformers.testing_utils import (
+    require_torch,
+    slow,
+    _run_slow_tests
+)
+import pytest
+import tenacity
+import docker
+
+class TestPytorchRemote:
+
+    @tenacity.retry(
+        retry = tenacity.retry_if_exception(docker.errors.APIError),
+        stop = tenacity.stop_after_attempt(5),
+        reraise = True
+    )
+    @pytest.mark.parametrize(
+        "device",
+        ["gpu"]
+    )
+    @pytest.mark.parametrize(
+        "task",
+        [
+            "text-classification",
+            "zero-shot-classification",
+            "question-answering",
+            "fill-mask",
+            "summarization",
+            "ner",
+            "translation_xx_to_yy",
+            "text2text-generation",
+            "text-generation",
+            "feature-extraction",
+            "image-classification",
+            "automatic-speech-recognition",
+            "audio-classification",
+            "object-detection",
+            "image-segmentation",
+            "table-question-answering",
+            "conversational",
+            "sentence-similarity",
+            "sentence-embeddings",
+            "sentence-ranking",
+            "text-to-image"
+        ]
+    )
+    @pytest.mark.parametrize(
+        "framework",
+        ["pytorch"]
+    )
+    @pytest.mark.usefixtures('remote_container')
+    def test_inference_remote(self, remote_container, task, framework, device):
+
+        verify_task(task = task, port = remote_container[1])
diff --git a/tests/integ/utils.py b/tests/integ/utils.py
index 813ba751..2b826cdb 100644
--- a/tests/integ/utils.py
+++ b/tests/integ/utils.py
@@ -1,21 +1,21 @@
 import logging
-import re
-import signal
 from contextlib import contextmanager
 from time import time
 
 
-LOGGER = logging.getLogger("timeout")
-
 
 def validate_classification(result=None, snapshot=None):
     for idx, _ in enumerate(result):
         assert result[idx].keys() == snapshot[idx].keys()
-        # assert result[idx]["score"] >= snapshot[idx]["score"]
     return True
 
+def validate_conversational(result=None, snapshot=None):
+    assert len(result) >= len(snapshot)
+
 
 def validate_zero_shot_classification(result=None, snapshot=None):
+    logging.info(f"Result: {result}")
+    logging.info(f"Snapshot: {snapshot}")
     assert result.keys() == snapshot.keys()
     # assert result["labels"] == snapshot["labels"]
     # assert result["sequence"] == snapshot["sequence"]
@@ -84,3 +84,8 @@ def validate_object_detection(result=None, snapshot=None):
 def validate_text_to_image(result=None, snapshot=None):
     assert isinstance(result, snapshot)
     return True
+
+def validate_custom(result=None, snapshot=None):
+    logging.info(f"Validate custom task - result: {result}, snapshot: {snapshot}")
+    assert result == snapshot
+    return True
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
new file mode 100644
index 00000000..ddba0442
--- /dev/null
+++ b/tests/unit/conftest.py
@@ -0,0 +1,7 @@
+import os
+import logging
+import pytest
+
+@pytest.fixture(scope = "session")
+def cache_test_dir():
+    yield os.environ.get("CACHE_TEST_DIR", "./tests")
\ No newline at end of file
diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py
index 32b10cf0..0f2890a8 100644
--- a/tests/unit/test_diffusers.py
+++ b/tests/unit/test_diffusers.py
@@ -7,12 +7,17 @@
 from huggingface_inference_toolkit.diffusers_utils import get_diffusers_pipeline, IEAutoPipelineForText2Image
 from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline
 
+import logging
+
+logging.basicConfig(level="DEBUG")
 
 @require_torch
 def test_get_diffusers_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch"
+            "hf-internal-testing/tiny-stable-diffusion-torch",
+            tmpdirname,
+            framework="pytorch"
         )
         pipe = get_pipeline("text-to-image", storage_dir.as_posix())
         assert isinstance(pipe, IEAutoPipelineForText2Image)
@@ -23,17 +28,25 @@ def test_get_diffusers_pipeline():
 def test_pipe_on_gpu():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch"
+            "hf-internal-testing/tiny-stable-diffusion-torch",
+            tmpdirname,
+            framework="pytorch"
         )
-        pipe = get_pipeline("text-to-image", storage_dir.as_posix())
-        assert pipe.device.type == "cuda"
+        pipe = get_pipeline(
+            "text-to-image",
+            storage_dir.as_posix()
+        )
+        logging.error(f"Pipe: {pipe.pipeline}")
+        assert pipe.pipeline.device.type == "cuda"
 
 
 @require_torch
 def test_text_to_image_task():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "hf-internal-testing/tiny-stable-diffusion-torch", tmpdirname, framework="pytorch"
+            "hf-internal-testing/tiny-stable-diffusion-torch",
+            tmpdirname,
+            framework="pytorch"
         )
         pipe = get_pipeline("text-to-image", storage_dir.as_posix())
         res = pipe("Lets create an embedding")
diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py
index 9306cdc3..d1a0a561 100644
--- a/tests/unit/test_handler.py
+++ b/tests/unit/test_handler.py
@@ -1,14 +1,19 @@
 import tempfile
-
-from transformers.testing_utils import require_torch, slow, require_tf
-
+from transformers.testing_utils import (
+    require_tf,
+    require_torch,
+    slow
+)
 import pytest
 from huggingface_inference_toolkit.handler import (
     HuggingFaceHandler,
     get_inference_handler_either_custom_or_default_handler,
 )
 
-from huggingface_inference_toolkit.utils import _is_gpu_available, _load_repository_from_hf
+from huggingface_inference_toolkit.utils import (
+    _is_gpu_available,
+    _load_repository_from_hf
+)
 
 
 TASK = "text-classification"
@@ -19,7 +24,6 @@
 @require_torch
 def test_pt_get_device():
     import torch
-
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
         storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch")
@@ -34,7 +38,11 @@ def test_pt_get_device():
 def test_pt_predict_call():
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
-        storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch")
+        storage_dir = _load_repository_from_hf(
+            MODEL,
+            tmpdirname,
+            framework="pytorch"
+        )
         h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK)
 
         prediction = h(INPUT)
@@ -46,7 +54,9 @@ def test_pt_predict_call():
 def test_pt_custom_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch"
+            "philschmid/custom-pipeline-text-classification",
+            tmpdirname,
+            framework="pytorch"
         )
         h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="custom")
         assert h(INPUT) == INPUT
@@ -56,7 +66,9 @@ def test_pt_custom_pipeline():
 def test_pt_sentence_transformers_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="pytorch"
+            "sentence-transformers/all-MiniLM-L6-v2",
+            tmpdirname,
+            framework="pytorch"
         )
         h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings")
         pred = h(INPUT)
@@ -65,11 +77,14 @@ def test_pt_sentence_transformers_pipeline():
 
 @require_tf
 def test_tf_get_device():
-    import tensorflow as tf
 
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
-        storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow")
+        storage_dir = _load_repository_from_hf(
+            MODEL,
+            tmpdirname,
+            framework="tensorflow"
+        )
         h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK)
         if _is_gpu_available():
             assert h.pipeline.device == 0
@@ -81,10 +96,18 @@ def test_tf_get_device():
 def test_tf_predict_call():
     with tempfile.TemporaryDirectory() as tmpdirname:
         # https://github.com/huggingface/infinity/blob/test-ovh/test/integ/utils.py
-        storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="tensorflow")
-        h = HuggingFaceHandler(model_dir=str(storage_dir), task=TASK)
+        storage_dir = _load_repository_from_hf(
+            MODEL,
+            tmpdirname,
+            framework="tensorflow"
+        )
+        handler = HuggingFaceHandler(
+            model_dir=str(storage_dir),
+            task=TASK,
+            framework="tf"
+        )
 
-        prediction = h(INPUT)
+        prediction = handler(INPUT)
         assert "label" in prediction[0]
         assert "score" in prediction[0]
 
@@ -104,9 +127,12 @@ def test_tf_sentence_transformers_pipeline():
     # TODO should fail! because TF is not supported yet
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "sentence-transformers/all-MiniLM-L6-v2", tmpdirname, framework="tensorflow"
+            "sentence-transformers/all-MiniLM-L6-v2",
+            tmpdirname,
+            framework="tensorflow"
         )
         with pytest.raises(Exception) as exc_info:
-            h = get_inference_handler_either_custom_or_default_handler(str(storage_dir), task="sentence-embeddings")
-
-        assert "Unknown task sentence-embeddings" in str(exc_info.value)
+            h = get_inference_handler_either_custom_or_default_handler(
+                str(storage_dir),
+                task="sentence-embeddings"
+            )
diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py
index 98e528e5..07dfd5c1 100644
--- a/tests/unit/test_serializer.py
+++ b/tests/unit/test_serializer.py
@@ -3,10 +3,13 @@
 import numpy as np
 import pytest
 import os
-from huggingface_inference_toolkit.serialization import Jsoner, Audioer, Imager
+from huggingface_inference_toolkit.serialization import (
+    Jsoner,
+    Audioer,
+    Imager
+)
 from PIL import Image
 
-
 def test_json_serialization():
     t = {"res": np.array([2.0]), "text": "I like you.", "float": 1.2}
     assert b'{"res":[2.0],"text":"I like you.","float":1.2}' == Jsoner.serialize(t)
@@ -30,9 +33,10 @@ def test_json_deserialization():
     raw_content = b'{\n\t"inputs": "i like you"\n}'
     assert {"inputs": "i like you"} == Jsoner.deserialize(raw_content)
 
+@pytest.mark.usefixtures('cache_test_dir')
+def test_image_deserialization(cache_test_dir):
 
-def test_image_deserialization():
-    image_files_path = os.path.join(os.getcwd(), "tests/resources/image")
+    image_files_path = f"{cache_test_dir}/resources/image"
 
     for image_file in os.listdir(image_files_path):
         image_bytes = open(os.path.join(image_files_path, image_file), "rb").read()
@@ -41,9 +45,10 @@ def test_image_deserialization():
         assert isinstance(decoded_data, dict)
         assert isinstance(decoded_data["inputs"], Image.Image)
 
+@pytest.mark.usefixtures('cache_test_dir')
+def test_audio_deserialization(cache_test_dir):
 
-def test_audio_deserialization():
-    audio_files_path = os.path.join(os.getcwd(), "tests/resources/audio")
+    audio_files_path = f"{cache_test_dir}/resources/audio"
 
     for audio_file in os.listdir(audio_files_path):
         audio_bytes = open(os.path.join(audio_files_path, audio_file), "rb").read()
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 9d5052ee..79cff93d 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -2,6 +2,7 @@
 from pathlib import Path
 import tempfile
 
+
 from transformers import pipeline
 from transformers.file_utils import is_torch_available
 from transformers.testing_utils import require_tf, require_torch, slow
@@ -16,6 +17,7 @@
     wrap_conversation_pipeline,
 )
 
+import logging
 
 MODEL = "lysandre/tiny-bert-random"
 TASK = "text-classification"
@@ -112,17 +114,33 @@ def test_get_framework_tensorflow():
 def test_get_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(MODEL, tmpdirname, framework="pytorch")
-        pipe = get_pipeline(TASK, storage_dir.as_posix())
+        pipe = get_pipeline(
+            task = TASK,
+            model_dir = storage_dir.as_posix(),
+            framework = "pytorch"
+        )
         res = pipe("Life is good, Life is bad")
         assert "score" in res[0]
 
 
 @require_torch
-def test_whisper_long_audio():
+def test_whisper_long_audio(cache_test_dir):
     with tempfile.TemporaryDirectory() as tmpdirname:
-        storage_dir = _load_repository_from_hf("openai/whisper-tiny", tmpdirname, framework="pytorch")
-        pipe = get_pipeline("automatic-speech-recognition", storage_dir.as_posix())
-        res = pipe(os.path.join(os.getcwd(), "tests/resources/audio", "long_sample.mp3"))
+        storage_dir = _load_repository_from_hf(
+            repository_id = "openai/whisper-tiny",
+            target_dir = tmpdirname,
+            framework = "pytorch",
+            revision = "be0ba7c2f24f0127b27863a23a08002af4c2c279"
+        )
+        logging.info(f"Temp dir: {tmpdirname}")
+        logging.info(f"POSIX Path: {storage_dir.as_posix()}")
+        logging.info(f"Contents: {os.listdir(tmpdirname)}")
+        pipe = get_pipeline(
+            task = "automatic-speech-recognition",
+            model_dir = storage_dir.as_posix(),
+            framework = "safetensors"
+        )
+        res = pipe(f"{cache_test_dir}/resources/audio/long_sample.mp3")
 
         assert len(res["text"]) > 700
 
@@ -136,33 +154,57 @@ def test_wrap_conversation_pipeline():
         framework="pt",
     )
     conv_pipe = wrap_conversation_pipeline(init_pipeline)
-    data = {
-        "past_user_inputs": ["Which movie is the best ?"],
-        "generated_responses": ["It's Die Hard for sure."],
-        "text": "Can you explain why?",
-    }
+    data = [
+        {
+            "role": "user",
+            "content": "Which movie is the best ?"
+        },
+        {
+            "role": "assistant",
+            "content": "It's Die Hard for sure."
+        },
+        {
+            "role": "user",
+            "content": "Can you explain why?"
+        }
+    ]
     res = conv_pipe(data)
-    assert "conversation" in res
-    assert "generated_text" in res
+    logging.info(f"Response: {res}")
+    assert res[-1]["role"] == "assistant"
+    assert "error" not in res[-1]["content"]
 
 
 @require_torch
 def test_wrapped_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
-        storage_dir = _load_repository_from_hf("microsoft/DialoGPT-small", tmpdirname, framework="pytorch")
+        storage_dir = _load_repository_from_hf(
+            repository_id = "microsoft/DialoGPT-small",
+            target_dir = tmpdirname,
+            framework="pytorch"
+        )
         conv_pipe = get_pipeline("conversational", storage_dir.as_posix())
-        data = {
-            "past_user_inputs": ["Which movie is the best ?"],
-            "generated_responses": ["It's Die Hard for sure."],
-            "text": "Can you explain why?",
-        }
-        res = conv_pipe(data)
-        assert "conversation" in res
-        assert "generated_text" in res
-
-
-def test_local_custom_pipeline():
-    model_dir = os.path.join(os.getcwd(), "tests/resources/custom_handler")
+        data = [
+            {
+                "role": "user",
+                "content": "Which movie is the best ?"
+            },
+            {
+                "role": "assistant",
+                "content": "It's Die Hard for sure."
+            },
+            {
+                "role": "user",
+                "content": "Can you explain why?"
+            }
+        ]
+        res = conv_pipe(data, max_new_tokens = 100)
+        logging.info(f"Response: {res}")
+        assert res[-1]["role"] == "assistant"
+        assert "error" not in res[-1]["content"]
+
+
+def test_local_custom_pipeline(cache_test_dir):
+    model_dir = f"{cache_test_dir}/resources/custom_handler"
     pipeline = check_and_register_custom_pipeline_from_directory(model_dir)
     payload = "test"
     assert pipeline.path == model_dir
@@ -172,7 +214,9 @@ def test_local_custom_pipeline():
 def test_remote_custom_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch"
+            "philschmid/custom-pipeline-text-classification",
+            tmpdirname,
+            framework="pytorch"
         )
         pipeline = check_and_register_custom_pipeline_from_directory(str(storage_dir))
         payload = "test"
@@ -183,7 +227,9 @@ def test_remote_custom_pipeline():
 def test_get_inference_handler_either_custom_or_default_pipeline():
     with tempfile.TemporaryDirectory() as tmpdirname:
         storage_dir = _load_repository_from_hf(
-            "philschmid/custom-pipeline-text-classification", tmpdirname, framework="pytorch"
+            "philschmid/custom-pipeline-text-classification",
+            tmpdirname,
+            framework="pytorch"
         )
         pipeline = get_inference_handler_either_custom_or_default_handler(str(storage_dir))
         payload = "test"