diff --git a/.github/workflows/cml.yaml b/.github/workflows/cml.yaml
new file mode 100644
index 000000000..a4a314601
--- /dev/null
+++ b/.github/workflows/cml.yaml
@@ -0,0 +1,73 @@
+name: benchmarks
+
+on:
+  workflow_dispatch:
+  pull_request:
+    types: [labeled]
+jobs:
+  deploy-cloud-runner:
+    if: ${{ (github.event.action == 'labeled' && github.event.label.name == 'benchmark') || github.event.action == 'workflow_dispatch' }}
+    runs-on: [ubuntu-latest]
+    container: docker://dvcorg/cml
+    steps:
+      - name: deploy
+        env:
+          repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CI_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CI_SECRET_ACCESS_KEY }}
+          VPC: ${{ secrets.AWS_CI_VPC }}
+        run: |
+          echo "Deploying..."
+          RUNNER_LABELS="cml,aws"
+          RUNNER_REPO="https://github.com/${GITHUB_REPOSITORY}"
+          MACHINE="cml$(date +%s)"
+          docker-machine create \
+            --driver amazonec2 \
+            --amazonec2-instance-type p3.2xlarge \
+            --amazonec2-vpc-id $VPC \
+            --amazonec2-region us-east-1 \
+            --amazonec2-zone c \
+            --amazonec2-ssh-user ubuntu \
+            --amazonec2-ami ami-06a25ee8966373068 \
+            --amazonec2-root-size 150 \
+            $MACHINE
+          eval "$(docker-machine env --shell sh $MACHINE)"
+
+          (
+          docker-machine ssh $MACHINE "sudo mkdir -p \
+            /docker_machine && \
+          sudo chmod 777 /docker_machine" && \
+          docker-machine scp -r -q ~/.docker/machine/ \
+            $MACHINE:/docker_machine && \
+          docker run --name runner -d \
+            --gpus all \
+            -v /docker_machine/machine:/root/.docker/machine \
+            --net host \
+            -e DOCKER_MACHINE=$MACHINE \
+            -e repo_token=$repo_token \
+            -e RUNNER_LABELS=$RUNNER_LABELS \
+            -e RUNNER_REPO=$RUNNER_REPO \
+            -e RUNNER_IDLE_TIMEOUT=120 \
+            dvcorg/cml-py3:latest && \
+          sleep 20 && echo "Deployed $MACHINE"
+          ) || (echo "Shut down machine" && docker-machine rm -y -f $MACHINE && exit 1)
+  run-benchmark:
+    if: ${{ (github.event.action == 'labeled' && github.event.label.name == 'benchmark') || github.event.action == 'workflow_dispatch' }}
+    needs: deploy-cloud-runner
+    runs-on: [self-hosted,cml]
+    steps:
+      - uses: actions/checkout@v2
+      - name: cml_run
+        env:
+          repo_token: ${{ secrets.HAYSTACK_BOT_TOKEN }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_CI_ACCESS_KEY }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_CI_SECRET_ACCESS_KEY }}
+        run: |
+          apt-get update -y
+          apt-get install python3-dev -y
+          pip install -r requirements.txt
+          pip install .
+          cd test/benchmarks && python question_answering_components.py
+          echo -en "## Benchmarks: QA per component\n" >> report.md
+          cat results_per_component.md >> report.md
+          cml-send-comment report.md
diff --git a/.gitignore b/.gitignore
index 0632f81b8..ed9a4e27c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -142,4 +142,6 @@ mlruns/
 .DS_Store
 
 # cache
-*cache*
\ No newline at end of file
+*cache*
+
+sandbox/
\ No newline at end of file
diff --git a/test/benchmarks/question_answering_components.py b/test/benchmarks/question_answering_components.py
index 34c4d0e40..1230f47d8 100644
--- a/test/benchmarks/question_answering_components.py
+++ b/test/benchmarks/question_answering_components.py
@@ -20,8 +20,8 @@
 questions_file = "samples/question_answering_questions.txt"
 num_processes = 1
 passages_per_char = 2400 / 1000000      # numerator is number of passages when 1mill chars paired with one of the questions, msl 384, doc stride 128
-date_str = date.today().strftime("%d_%m_%Y")
-output_file = f"results_component_test_{date_str}.csv"
+# date_str = date.today().strftime("%d_%m_%Y")
+output_file = f"results_per_component.csv"
 
 params = {
     "modelname": ["deepset/bert-base-cased-squad2", "deepset/minilm-uncased-squad2", "deepset/roberta-base-squad2", "deepset/bert-large-uncased-whole-word-masking-squad2", "deepset/xlm-roberta-large-squad2"],
@@ -44,7 +44,8 @@ def benchmark(params, output=output_file):
         df = pd.DataFrame.from_records(results)
         df.to_csv(output)
         logger.info("\n\n" + pformat(result) + "\n")
-
+        with open(output_file.replace(".csv", ".md"), "w") as f:
+            f.write(str(df.to_markdown()))
 
 def warmup_run():
     """ This run warms up the gpu. We saw cases where the first run in the loop took longer or showed different