diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index aa59188ca24..b703b16d6ed 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -22,7 +22,7 @@ jobs:
       platforms: ${{ matrix.platforms }}
       runs-on: ${{ matrix.runs-on }}
       base-image: ${{ matrix.base-image }}
-      makeflags: "--jobs=3 --output-sync=target"
+      makeflags: ${{ matrix.makeflags }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -42,6 +42,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"
@@ -52,6 +53,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -60,6 +62,7 @@ jobs:
             image-type: 'extras'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -68,6 +71,7 @@ jobs:
             ffmpeg: 'true'
             image-type: 'extras'
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
   core-image-build:
     uses: ./.github/workflows/image_build.yml
     with:
@@ -81,7 +85,7 @@ jobs:
       platforms: ${{ matrix.platforms }}
       runs-on: ${{ matrix.runs-on }}
       base-image: ${{ matrix.base-image }}
-      makeflags: "--jobs=3 --output-sync=target"
+      makeflags: ${{ matrix.makeflags }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -98,6 +102,7 @@ jobs:
             image-type: 'core'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=5 --output-sync=target"
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -106,6 +111,7 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"
@@ -115,4 +121,5 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
\ No newline at end of file
+            base-image: "ubuntu:22.04"
+            makeflags: "--jobs=5 --output-sync=target"
\ No newline at end of file
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 40deb0ecc27..79a38fc5fc9 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -27,7 +27,7 @@ jobs:
       runs-on: ${{ matrix.runs-on }}
       base-image: ${{ matrix.base-image }}
       aio: ${{ matrix.aio }}
-      makeflags: "--jobs=3 --output-sync=target"
+      makeflags: ${{ matrix.makeflags }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -49,6 +49,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: ''
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -57,6 +58,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -67,6 +69,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"
@@ -77,6 +80,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -88,6 +92,7 @@ jobs:
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
             aio: "-aio-gpu-nvidia-cuda-11"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"
@@ -99,6 +104,7 @@ jobs:
             runs-on: 'arc-runner-set'
             base-image: "ubuntu:22.04"
             aio: "-aio-gpu-nvidia-cuda-12"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: ''
             #platforms: 'linux/amd64,linux/arm64'
             platforms: 'linux/amd64'
@@ -108,6 +114,7 @@ jobs:
             image-type: 'extras'
             base-image: "ubuntu:22.04"
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -117,6 +124,7 @@ jobs:
             aio: "-aio-gpu-hipblas"
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -125,6 +133,7 @@ jobs:
             image-type: 'extras'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -134,6 +143,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             aio: "-aio-gpu-intel-f16"
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'sycl_f32'
             platforms: 'linux/amd64'
             tag-latest: 'auto'
@@ -143,6 +153,7 @@ jobs:
             image-type: 'extras'
             runs-on: 'arc-runner-set'
             aio: "-aio-gpu-intel-f32"
+            makeflags: "--jobs=3 --output-sync=target"
           # Core images
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
@@ -152,6 +163,7 @@ jobs:
             ffmpeg: 'false'
             image-type: 'core'
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'sycl_f32'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -160,6 +172,7 @@ jobs:
             ffmpeg: 'false'
             image-type: 'core'
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -168,6 +181,7 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'sycl_f32'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -176,6 +190,7 @@ jobs:
             ffmpeg: 'true'
             image-type: 'core'
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -184,6 +199,7 @@ jobs:
             image-type: 'core'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
           - build-type: 'hipblas'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -192,6 +208,7 @@ jobs:
             image-type: 'core'
             base-image: "rocm/dev-ubuntu-22.04:6.0-complete"
             runs-on: 'arc-runner-set'
+            makeflags: "--jobs=3 --output-sync=target"
   
   core-image-build:
     uses: ./.github/workflows/image_build.yml
@@ -207,7 +224,7 @@ jobs:
       runs-on: ${{ matrix.runs-on }}
       aio: ${{ matrix.aio }}
       base-image: ${{ matrix.base-image }}
-      makeflags: "--jobs=3 --output-sync=target"
+      makeflags: ${{ matrix.makeflags }}
     secrets:
       dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
       dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
@@ -225,6 +242,7 @@ jobs:
             base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
             aio: "-aio-cpu"
+            makeflags: "--jobs=5 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -235,6 +253,7 @@ jobs:
             image-type: 'core'
             base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
+            makeflags: "--jobs=5 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"
@@ -245,6 +264,7 @@ jobs:
             image-type: 'core'
             base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
+            makeflags: "--jobs=5 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -255,6 +275,7 @@ jobs:
             image-type: 'core'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=5 --output-sync=target"
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "1"
@@ -265,3 +286,4 @@ jobs:
             image-type: 'core'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
+            makeflags: "--jobs=5 --output-sync=target"
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 659f85deb8b..d07df44161b 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -49,7 +49,7 @@ on:
       makeflags:
         description: 'Make Flags'
         required: false
-        default: ''
+        default: '--jobs=3 --output-sync=target'
         type: string
       aio:
         description: 'AIO Image Name'
@@ -79,6 +79,7 @@ jobs:
           && sudo apt-get install -y git
       - name: Checkout
         uses: actions/checkout@v4
+
       - name: Release space from worker
         if: inputs.runs-on == 'ubuntu-latest'
         run: |
@@ -120,6 +121,7 @@ jobs:
           sudo rm -rf "/usr/local/share/boost" || true
           sudo rm -rf "$AGENT_TOOLSDIRECTORY" || true
           df -h
+
       - name: Docker meta
         id: meta
         uses: docker/metadata-action@v5
@@ -134,6 +136,7 @@ jobs:
           flavor: |
             latest=${{ inputs.tag-latest }}
             suffix=${{ inputs.tag-suffix }}
+
       - name: Docker meta AIO (quay.io)
         if: inputs.aio != ''
         id: meta_aio
@@ -147,6 +150,7 @@ jobs:
           flavor: |
             latest=${{ inputs.tag-latest }}
             suffix=${{ inputs.aio }}
+
       - name: Docker meta AIO (dockerhub)
         if: inputs.aio != ''
         id: meta_aio_dockerhub
@@ -160,6 +164,7 @@ jobs:
           flavor: |
             latest=${{ inputs.tag-latest }}
             suffix=${{ inputs.aio }}
+
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
         with:
@@ -184,6 +189,25 @@ jobs:
           username: ${{ secrets.quayUsername }}
           password: ${{ secrets.quayPassword }}
 
+      - name: Cache GRPC
+        uses: docker/build-push-action@v5
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          build-args: |
+            IMAGE_TYPE=${{ inputs.image-type }}
+            BASE_IMAGE=${{ inputs.base-image }}
+            MAKEFLAGS=${{ inputs.makeflags }}
+            GRPC_VERSION=v1.58.0
+          context: .
+          file: ./Dockerfile
+          cache-from: type=gha
+          cache-to: type=gha,ignore-error=true
+          target: grpc
+          platforms: ${{ inputs.platforms }}
+          push: false
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
       - name: Build and push
         uses: docker/build-push-action@v5
         with:
@@ -198,18 +222,20 @@ jobs:
             MAKEFLAGS=${{ inputs.makeflags }}
           context: .
           file: ./Dockerfile
+          cache-from: type=gha
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
-      -
-        name: Inspect image
+
+      - name: Inspect image
         if: github.event_name != 'pull_request'
         run: |
           docker pull localai/localai:${{ steps.meta.outputs.version }}
           docker image inspect localai/localai:${{ steps.meta.outputs.version }}
           docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
           docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+
       - name: Build and push AIO image
         if: inputs.aio != ''
         uses: docker/build-push-action@v5
@@ -217,12 +243,14 @@ jobs:
           builder: ${{ steps.buildx.outputs.name }}
           build-args: |
             BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }}
+            MAKEFLAGS=${{ inputs.makeflags }}
           context: .
           file: ./Dockerfile.aio
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.meta_aio.outputs.tags }}
           labels: ${{ steps.meta_aio.outputs.labels }}
+
       - name: Build and push AIO image (dockerhub)
         if: inputs.aio != ''
         uses: docker/build-push-action@v5
@@ -230,15 +258,18 @@ jobs:
           builder: ${{ steps.buildx.outputs.name }}
           build-args: |
             BASE_IMAGE=localai/localai:${{ steps.meta.outputs.version }}
+            MAKEFLAGS=${{ inputs.makeflags }}
           context: .
           file: ./Dockerfile.aio
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
           labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
+
       - name: job summary
         run: |
           echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
+
       - name: job summary(AIO)
         if: inputs.aio != ''
         run: |
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 6ac816eeed7..1d749189614 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -2,6 +2,9 @@ name: Build and Release
 
 on: push
 
+env:
+  GRPC_VERSION: v1.58.0
+
 permissions:
   contents: write
 
@@ -32,7 +35,8 @@ jobs:
           submodules: true
       - uses: actions/setup-go@v4
         with:
-          go-version: '>=1.21.0'
+          go-version: '1.21.x'
+          cache: false
       - name: Dependencies
         run: |
           sudo apt-get update
@@ -54,17 +58,17 @@ jobs:
         uses: actions/cache@v3
         with:
           path: grpc
-          key: ${{ runner.os }}-grpc
+          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
       - name: Build grpc
         if: steps.cache-grpc.outputs.cache-hit != 'true'
         run: |
-          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
           cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
             -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make -j12
+            ../.. && sudo make --jobs 5 --output-sync=target
       - name: Install gRPC
         run: |
-          cd grpc && cd cmake/build && sudo make -j12 install
+          cd grpc && cd cmake/build && sudo make --jobs 5 --output-sync=target install
       - name: Build
         id: build
         env:
@@ -98,7 +102,8 @@ jobs:
           submodules: true
       - uses: actions/setup-go@v4
         with:
-          go-version: '>=1.21.0'
+          go-version: '1.21.x'
+          cache: false
       - name: Dependencies
         run: |
           sudo apt-get install -y --no-install-recommends libopencv-dev
@@ -135,7 +140,8 @@ jobs:
           submodules: true
       - uses: actions/setup-go@v4
         with:
-          go-version: '>=1.21.0'
+          go-version: '1.21.x'
+          cache: false
       - name: Dependencies
         run: |
           brew install protobuf grpc
diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml
index 5f61835dd64..6f92c806a3a 100644
--- a/.github/workflows/test-extra.yml
+++ b/.github/workflows/test-extra.yml
@@ -40,8 +40,8 @@ jobs:
       - name: Test transformers
         run: |
            export PATH=$PATH:/opt/conda/bin
-           make -C backend/python/transformers
-           make -C backend/python/transformers test
+           make --jobs=5 --output-sync=target -C backend/python/transformers
+           make --jobs=5 --output-sync=target -C backend/python/transformers test
 
   tests-sentencetransformers:
     runs-on: ubuntu-latest
@@ -69,8 +69,8 @@ jobs:
       - name: Test sentencetransformers
         run: |
            export PATH=$PATH:/opt/conda/bin
-           make -C backend/python/sentencetransformers
-           make -C backend/python/sentencetransformers test
+           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
+           make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
 
   tests-diffusers:
     runs-on: ubuntu-latest
@@ -98,8 +98,8 @@ jobs:
       - name: Test diffusers
         run: |
            export PATH=$PATH:/opt/conda/bin
-           make -C backend/python/diffusers
-           make -C backend/python/diffusers test
+           make --jobs=5 --output-sync=target -C backend/python/diffusers
+           make --jobs=5 --output-sync=target -C backend/python/diffusers test
 
 
   tests-transformers-musicgen:
@@ -128,8 +128,8 @@ jobs:
       - name: Test transformers-musicgen
         run: |
            export PATH=$PATH:/opt/conda/bin
-           make -C backend/python/transformers-musicgen
-           make -C backend/python/transformers-musicgen test
+           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen
+           make --jobs=5 --output-sync=target -C backend/python/transformers-musicgen test
 
 
 
@@ -159,8 +159,8 @@ jobs:
       - name: Test petals
         run: |
            export PATH=$PATH:/opt/conda/bin
-           make -C backend/python/petals
-           make -C backend/python/petals test
+           make --jobs=5 --output-sync=target -C backend/python/petals
+           make --jobs=5 --output-sync=target -C backend/python/petals test
 
            
 
@@ -230,8 +230,8 @@ jobs:
   #     - name: Test bark
   #       run: |
   #          export PATH=$PATH:/opt/conda/bin
-  #          make -C backend/python/bark
-  #          make -C backend/python/bark test
+  #          make --jobs=5 --output-sync=target -C backend/python/bark
+  #          make --jobs=5 --output-sync=target -C backend/python/bark test
 
            
   # Below tests needs GPU. Commented out for now
@@ -260,8 +260,8 @@ jobs:
   #     - name: Test vllm
   #       run: |
   #          export PATH=$PATH:/opt/conda/bin
-  #          make -C backend/python/vllm
-  #          make -C backend/python/vllm test
+  #          make --jobs=5 --output-sync=target -C backend/python/vllm
+  #          make --jobs=5 --output-sync=target -C backend/python/vllm test
   tests-vallex:
     runs-on: ubuntu-latest
     steps:
@@ -286,8 +286,8 @@ jobs:
       - name: Test vall-e-x
         run: |
            export PATH=$PATH:/opt/conda/bin
-           make -C backend/python/vall-e-x
-           make -C backend/python/vall-e-x test
+           make --jobs=5 --output-sync=target -C backend/python/vall-e-x
+           make --jobs=5 --output-sync=target -C backend/python/vall-e-x test
 
   tests-coqui:
     runs-on: ubuntu-latest
@@ -313,5 +313,5 @@ jobs:
       - name: Test coqui
         run: |
            export PATH=$PATH:/opt/conda/bin
-           make -C backend/python/coqui
-           make -C backend/python/coqui test
+           make --jobs=5 --output-sync=target -C backend/python/coqui
+           make --jobs=5 --output-sync=target -C backend/python/coqui test
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 203aeeca972..95d108626b8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,6 +9,9 @@ on:
     tags:
       - '*'
 
+env:
+  GRPC_VERSION: v1.58.0
+
 concurrency:
   group: ci-tests-${{ github.head_ref || github.ref }}-${{ github.repository }}
   cancel-in-progress: true
@@ -60,6 +63,7 @@ jobs:
         uses: actions/setup-go@v4
         with:
           go-version: ${{ matrix.go-version }}
+          cache: false
       # You can test your matrix by printing the current Go version
       - name: Display Go version
         run: go version
@@ -91,20 +95,20 @@ jobs:
         uses: actions/cache@v3
         with:
           path: grpc
-          key: ${{ runner.os }}-grpc
+          key: ${{ runner.os }}-grpc-${{ env.GRPC_VERSION }}
       - name: Build grpc
         if: steps.cache-grpc.outputs.cache-hit != 'true'
         run: |
-          git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
+          git clone --recurse-submodules -b ${{ env.GRPC_VERSION }} --depth 1 --jobs 5 --shallow-submodules https://github.com/grpc/grpc && \
           cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
             -DgRPC_BUILD_TESTS=OFF \
-            ../.. && sudo make -j12
+            ../.. && sudo make --jobs 5
       - name: Install gRPC
         run: |
-          cd grpc && cd cmake/build && sudo make -j12 install
+          cd grpc && cd cmake/build && sudo make --jobs 5 install
       - name: Test
         run: |
-          GO_TAGS="stablediffusion tts" make test
+          GO_TAGS="stablediffusion tts" make --jobs 5 --output-sync=target test
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
         uses: mxschmitt/action-tmate@v3
@@ -151,7 +155,7 @@ jobs:
           submodules: true
       - name: Build images
         run: |
-          docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core -t local-ai:tests -f Dockerfile .
+          docker build --build-arg FFMPEG=true --build-arg IMAGE_TYPE=core --build-arg MAKEFLAGS="--jobs=5 --output-sync=target" -t local-ai:tests -f Dockerfile .
           BASE_IMAGE=local-ai:tests DOCKER_AIO_IMAGE=local-ai-aio:test make docker-aio
       - name: Test
         run: |
@@ -176,17 +180,20 @@ jobs:
         uses: actions/setup-go@v4
         with:
           go-version: ${{ matrix.go-version }}
+          cache: false
       # You can test your matrix by printing the current Go version
       - name: Display Go version
         run: go version
       - name: Dependencies
         run: |
-          brew install protobuf grpc
+          brew install protobuf grpc make
       - name: Test
         run: |
           export C_INCLUDE_PATH=/usr/local/include
           export CPLUS_INCLUDE_PATH=/usr/local/include
-          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make test
+          # Used to run the newer GNUMake version from brew that supports --output-sync
+          export PATH="/opt/homebrew/opt/make/libexec/gnubin:$PATH"
+          BUILD_TYPE="GITHUB_CI_HAS_BROKEN_METAL" CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make --jobs 4 --output-sync=target test
       - name: Setup tmate session if tests fail
         if: ${{ failure() }}
         uses: mxschmitt/action-tmate@v3
diff --git a/Dockerfile b/Dockerfile
index 8725e76dd5b..5fb6230c47a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -90,11 +90,35 @@ RUN if [ ! -e /usr/bin/python ]; then \
 ###################################
 ###################################
 
+FROM ${BASE_IMAGE} as grpc
+
+ARG MAKEFLAGS
+ARG GRPC_VERSION=v1.58.0
+
+ENV MAKEFLAGS=${MAKEFLAGS}
+
+WORKDIR /build
+
+RUN apt-get update && \
+    apt-get install -y g++ cmake git && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shallow-submodules https://github.com/grpc/grpc
+
+RUN cd grpc && \
+    mkdir -p cmake/build && \
+    cd cmake/build && \
+    cmake -DgRPC_INSTALL=ON -DgRPC_BUILD_TESTS=OFF ../.. && \
+    make
+
+###################################
+###################################
+
 FROM requirements-${IMAGE_TYPE} as builder
 
 ARG GO_TAGS="stablediffusion tts"
 ARG GRPC_BACKENDS
-ARG BUILD_GRPC=true
 ARG MAKEFLAGS
 
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
@@ -121,12 +145,9 @@ RUN if [ "${BUILD_TYPE}" = "clblas" ]; then \
 # stablediffusion does not tolerate a newer version of abseil, build it first
 RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
 
-RUN if [ "${BUILD_GRPC}" = "true" ]; then \
-    git clone --recurse-submodules --jobs 4 -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-    cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-      -DgRPC_BUILD_TESTS=OFF \
-       ../.. && make install \
-    ; fi
+COPY --from=grpc /build/grpc ./grpc/
+
+RUN cd /build/grpc/cmake/build && make install
 
 # Rebuild with defaults backends
 RUN make build
@@ -179,7 +200,7 @@ WORKDIR /build
 COPY . .
 
 COPY --from=builder /build/sources ./sources/
-COPY --from=builder /build/grpc ./grpc/
+COPY --from=grpc /build/grpc ./grpc/
 
 RUN make prepare-sources && cd /build/grpc/cmake/build && make install && rm -rf grpc
 
diff --git a/Makefile b/Makefile
index c6af54ebe1a..1a8f762353a 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ BINARY_NAME=local-ai
 
 # llama.cpp versions
 GOLLAMA_STABLE_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=5106ef482c65ac60ac14da9a68c7b37bca4c6993
+CPPLLAMA_VERSION?=ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -16,7 +16,7 @@ RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
 RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
-WHISPER_CPP_VERSION?=fc366b807a17dc05813a6fcc13c8c4dfd442fa6a
+WHISPER_CPP_VERSION?=1e8f28c42a1472ae7c49d0502ea06e2f5bc29a69
 
 # bert.cpp version
 BERT_VERSION?=6abe312cded14042f6b7c3cd8edf082713334a4d
@@ -310,6 +310,12 @@ build: prepare backend-assets grpcs ## Build the project
 	$(info ${GREEN}I LD_FLAGS: ${YELLOW}$(LD_FLAGS)${RESET})
 	CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./
 
+build-minimal:
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build
+
+build-api:
+	BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
+
 dist: build
 	mkdir -p release
 	cp $(BINARY_NAME) release/$(BINARY_NAME)-$(BUILD_ID)-$(OS)-$(ARCH)
@@ -352,7 +358,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
+	docker build --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -564,6 +570,7 @@ docker:
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
+		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .
 	
@@ -571,6 +578,7 @@ docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
 	docker build \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
+		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		-t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio .
 
 docker-aio-all:
@@ -582,6 +590,7 @@ docker-image-intel:
 		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
+		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
 
 docker-image-intel-xpu:
@@ -589,4 +598,9 @@ docker-image-intel-xpu:
 		--build-arg BASE_IMAGE=intel/oneapi-basekit:2024.0.1-devel-ubuntu22.04 \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="none" \
+		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=sycl_f32 -t $(DOCKER_IMAGE) .
+
+.PHONY: swagger
+swagger:
+	swag init -g core/http/api.go --output swagger
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index d04e5642f04..a2e040fa604 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -5,54 +5,77 @@ echo "===> LocalAI All-in-One (AIO) container starting..."
 GPU_ACCELERATION=false
 GPU_VENDOR=""
 
+function check_intel() {
+    if lspci | grep -E 'VGA|3D' | grep -iq intel; then
+        echo "Intel GPU detected"
+        if [ -d /opt/intel ]; then
+            GPU_ACCELERATION=true
+            GPU_VENDOR=intel
+        else
+            echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
+        fi
+    fi
+}
+
+function check_nvidia_wsl() {
+    if lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
+        # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
+        # Make sure the container was run with `--gpus all` as the only required parameter
+        echo "NVIDIA GPU detected via WSL2"
+        # nvidia-smi should be installed in the container
+        if nvidia-smi; then
+            GPU_ACCELERATION=true
+            GPU_VENDOR=nvidia
+        else
+            echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
+        fi
+    fi
+}
+
+function check_amd() {
+    if lspci | grep -E 'VGA|3D' | grep -iq amd; then
+        echo "AMD GPU detected"
+        # Check if ROCm is installed
+        if [ -d /opt/rocm ]; then
+            GPU_ACCELERATION=true
+            GPU_VENDOR=amd
+        else
+            echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
+        fi
+    fi
+}
+
+function check_nvidia() {
+    if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
+        echo "NVIDIA GPU detected"
+        # nvidia-smi should be installed in the container
+        if nvidia-smi; then
+            GPU_ACCELERATION=true
+            GPU_VENDOR=nvidia
+        else
+            echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
+        fi
+    fi
+}
+
+function check_metal() {
+    if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
+        echo "Apple Metal supported GPU detected"
+        GPU_ACCELERATION=true
+        GPU_VENDOR=apple
+    fi
+}
+
 function detect_gpu() {
     case "$(uname -s)" in
         Linux)
-            if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then
-                echo "NVIDIA GPU detected"
-                # nvidia-smi should be installed in the container
-                if nvidia-smi; then
-                    GPU_ACCELERATION=true
-                    GPU_VENDOR=nvidia
-                else
-                    echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available."
-                fi
-            elif lspci | grep -E 'VGA|3D' | grep -iq amd; then
-                echo "AMD GPU detected"
-                # Check if ROCm is installed
-                if [ -d /opt/rocm ]; then
-                    GPU_ACCELERATION=true
-                    GPU_VENDOR=amd
-                else
-                    echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available."
-                fi
-            elif lspci | grep -E 'VGA|3D' | grep -iq intel; then
-                echo "Intel GPU detected"
-                if [ -d /opt/intel ]; then
-                    GPU_ACCELERATION=true
-                    GPU_VENDOR=intel
-                else
-                    echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available."
-                fi
-            elif lspci | grep -E 'VGA|3D' | grep -iq "Microsoft Corporation Device 008e"; then
-                # We make the assumption this WSL2 cars is NVIDIA, then check for nvidia-smi
-                # Make sure the container was run with `--gpus all` as the only required parameter
-                echo "NVIDIA GPU detected via WSL2"
-                # nvidia-smi should be installed in the container
-                if nvidia-smi; then
-                    GPU_ACCELERATION=true
-                    GPU_VENDOR=nvidia
-                else
-                    echo "NVIDIA GPU detected via WSL2, but nvidia-smi is not installed. GPU acceleration will not be available."
-                fi
-            fi
+            check_nvidia
+            check_amd
+            check_intel
+            check_nvidia_wsl
             ;;
         Darwin)
-            if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then
-                echo "Apple Metal supported GPU detected"
-                GPU_ACCELERATION=true
-                GPU_VENDOR=apple
-            fi
+            check_metal
             ;;
     esac
 }
@@ -96,8 +119,8 @@ function check_vars() {
         exit 1
     fi
 
-    if [ -z "$SIZE" ]; then
-        echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
+    if [ -z "$PROFILE" ]; then
+        echo "PROFILE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple"
         exit 1
     fi
 }
@@ -105,11 +128,11 @@ function check_vars() {
 detect_gpu
 detect_gpu_size
 
-SIZE="${SIZE:-$GPU_SIZE}" # default to cpu
-export MODELS="${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml}"
+PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
+export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
 
 check_vars
 
-echo "Starting LocalAI with the following models: $MODELS"
+echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
 
 /build/entrypoint.sh "$@"
diff --git a/aio/gpu-8g/text-to-text.yaml b/aio/gpu-8g/text-to-text.yaml
index c6f26c071a0..1a67169b685 100644
--- a/aio/gpu-8g/text-to-text.yaml
+++ b/aio/gpu-8g/text-to-text.yaml
@@ -3,30 +3,27 @@ mmap: true
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
 
-roles:
-  assistant_function_call: assistant
-  function: tool
 template:
   chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
-    {{ if eq .RoleName "function" }}<tool_result>{{end}}
+    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+    {{ if .FunctionCall }}<tool_call>{{end}}
+    {{ if eq .RoleName "tool" }}<tool_result>{{end}}
     {{if .Content}}{{.Content}}{{end}}
     {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
-    {{ if eq .RoleName "function" }}</tool_result>{{end}}
+    {{ if .FunctionCall }}</tool_call>{{end}}
+    {{ if eq .RoleName "tool" }}</tool_result>{{end}}
     <|im_end|>
   # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
   function: |
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 
+    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
-    </tools> 
-    Use the following pydantic model json schema for each tool call you will make: 
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} 
+    </tools>
+    Use the following pydantic model json schema for each tool call you will make:
+    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
     For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
     <tool_call>
     {'arguments': <args-dict>, 'name': <function-name>}
diff --git a/aio/intel/text-to-text.yaml b/aio/intel/text-to-text.yaml
index ef36b562db0..0577d19b164 100644
--- a/aio/intel/text-to-text.yaml
+++ b/aio/intel/text-to-text.yaml
@@ -4,30 +4,27 @@ f16: false
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
 
-roles:
-  assistant_function_call: assistant
-  function: tool
 template:
   chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
-    {{ if eq .RoleName "function" }}<tool_result>{{end}}
+    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+    {{ if .FunctionCall }}<tool_call>{{end}}
+    {{ if eq .RoleName "tool" }}<tool_result>{{end}}
     {{if .Content}}{{.Content}}{{end}}
     {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
-    {{ if eq .RoleName "function" }}</tool_result>{{end}}
+    {{ if .FunctionCall }}</tool_call>{{end}}
+    {{ if eq .RoleName "tool" }}</tool_result>{{end}}
     <|im_end|>
   # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
   function: |
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 
+    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
-    </tools> 
-    Use the following pydantic model json schema for each tool call you will make: 
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} 
+    </tools>
+    Use the following pydantic model json schema for each tool call you will make:
+    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
     For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
     <tool_call>
     {'arguments': <args-dict>, 'name': <function-name>}
diff --git a/core/http/api.go b/core/http/api.go
index 033b6901392..5c9095eaf25 100644
--- a/core/http/api.go
+++ b/core/http/api.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/go-skynet/LocalAI/core"
 	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
+	"github.com/gofiber/swagger" // swagger handler
 
 	"github.com/go-skynet/LocalAI/core/http/endpoints/elevenlabs"
 	"github.com/go-skynet/LocalAI/core/http/endpoints/localai"
@@ -39,6 +40,18 @@ func readAuthHeader(c *fiber.Ctx) string {
 	return authHeader
 }
 
+// @title LocalAI API
+// @version 2.0.0
+// @description The LocalAI Rest API.
+// @termsOfService
+// @contact.name LocalAI
+// @contact.url https://localai.io
+// @license.name MIT
+// @license.url https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
+// @BasePath /
+// @securityDefinitions.apikey BearerAuth
+// @in header
+// @name Authorization
 func App(application *core.Application) (*fiber.App, error) {
 	// Return errors as JSON responses
 	app := fiber.New(fiber.Config{
@@ -157,6 +170,8 @@ func App(application *core.Application) (*fiber.App, error) {
 		}{Version: internal.PrintableVersion()})
 	})
 
+	app.Get("/swagger/*", swagger.HandlerDefault) // default
+
 	welcomeRoute(
 		app,
 		application.BackendConfigLoader,
diff --git a/core/http/endpoints/elevenlabs/tts.go b/core/http/endpoints/elevenlabs/tts.go
index 1ea890e4bf4..4f5db4638e0 100644
--- a/core/http/endpoints/elevenlabs/tts.go
+++ b/core/http/endpoints/elevenlabs/tts.go
@@ -9,6 +9,12 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
+// @Summary Generates audio from the input text.
+// @Param  voice-id	path string	true	"Account ID"
+// @Param request body schema.TTSRequest true "query params"
+// @Success 200 {string} binary	 "Response"
+// @Router /v1/text-to-speech/{voice-id} [post]
 func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 
diff --git a/core/http/endpoints/localai/tts.go b/core/http/endpoints/localai/tts.go
index 08846bef4ea..df7841fb242 100644
--- a/core/http/endpoints/localai/tts.go
+++ b/core/http/endpoints/localai/tts.go
@@ -9,6 +9,11 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
+// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
+// @Summary Generates audio from the input text.
+// @Param request body schema.TTSRequest true "query params"
+// @Success 200 {string} binary	 "Response"
+// @Router /v1/audio/speech [post]
 func TTSEndpoint(fce *fiberContext.FiberContextExtractor, ttsbs *backend.TextToSpeechBackendService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		var err error
diff --git a/core/http/endpoints/openai/assistant.go b/core/http/endpoints/openai/assistant.go
index 0e0d8a99dbf..72cb8b4ab7d 100644
--- a/core/http/endpoints/openai/assistant.go
+++ b/core/http/endpoints/openai/assistant.go
@@ -2,17 +2,18 @@ package openai
 
 import (
 	"fmt"
-	"github.com/go-skynet/LocalAI/core/config"
-	model "github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
-	"github.com/gofiber/fiber/v2"
-	"github.com/rs/zerolog/log"
 	"net/http"
 	"sort"
 	"strconv"
 	"strings"
 	"sync/atomic"
 	"time"
+
+	"github.com/go-skynet/LocalAI/core/config"
+	model "github.com/go-skynet/LocalAI/pkg/model"
+	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/gofiber/fiber/v2"
+	"github.com/rs/zerolog/log"
 )
 
 // ToolType defines a type for tool options
@@ -65,6 +66,11 @@ type AssistantRequest struct {
 	Metadata     map[string]string `json:"metadata,omitempty"`
 }
 
+// CreateAssistantEndpoint is the OpenAI Assistant API endpoint https://platform.openai.com/docs/api-reference/assistants/createAssistant
+// @Summary Create an assistant with a model and instructions.
+// @Param request body AssistantRequest true "query params"
+// @Success 200 {object} Assistant "Response"
+// @Router /v1/assistants [post]
 func CreateAssistantEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		request := new(AssistantRequest)
@@ -333,7 +339,7 @@ func CreateAssistantFileEndpoint(cl *config.BackendConfigLoader, ml *model.Model
 			}
 		}
 
-		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find "))
+		return c.Status(fiber.StatusNotFound).SendString(fmt.Sprintf("Unable to find assistantID %q", assistantID))
 	}
 }
 
diff --git a/core/http/endpoints/openai/chat.go b/core/http/endpoints/openai/chat.go
index b168f380395..2e9a8f00789 100644
--- a/core/http/endpoints/openai/chat.go
+++ b/core/http/endpoints/openai/chat.go
@@ -14,6 +14,11 @@ import (
 	"github.com/valyala/fasthttp"
 )
 
+// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
+// @Summary Generate a chat completions for a given prompt and model.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/chat/completions [post]
 func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		_, request, err := fce.OpenAIRequestFromContext(c, false)
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
index f63ae159a25..542236ee00c 100644
--- a/core/http/endpoints/openai/completion.go
+++ b/core/http/endpoints/openai/completion.go
@@ -15,7 +15,11 @@ import (
 	"github.com/valyala/fasthttp"
 )
 
-// https://platform.openai.com/docs/api-reference/completions
+// CompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
+// @Summary Generate completions for a given prompt and model.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/completions [post]
 func CompletionEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		_, request, err := fce.OpenAIRequestFromContext(c, false)
diff --git a/core/http/endpoints/openai/embeddings.go b/core/http/endpoints/openai/embeddings.go
index 0de92f00b12..be546991753 100644
--- a/core/http/endpoints/openai/embeddings.go
+++ b/core/http/endpoints/openai/embeddings.go
@@ -11,7 +11,11 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-// https://platform.openai.com/docs/api-reference/embeddings
+// EmbeddingsEndpoint is the OpenAI Embeddings API endpoint https://platform.openai.com/docs/api-reference/embeddings
+// @Summary Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/embeddings [post]
 func EmbeddingsEndpoint(fce *fiberContext.FiberContextExtractor, ebs *backend.EmbeddingsBackendService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		_, input, err := fce.OpenAIRequestFromContext(c, true)
diff --git a/core/http/endpoints/openai/image.go b/core/http/endpoints/openai/image.go
index a77d7143850..c9dc9e59f66 100644
--- a/core/http/endpoints/openai/image.go
+++ b/core/http/endpoints/openai/image.go
@@ -27,6 +27,12 @@ import (
 
 *
 */
+
+// ImageEndpoint is the OpenAI Image generation API endpoint https://platform.openai.com/docs/api-reference/images/create
+// @Summary Creates an image given a prompt.
+// @Param request body schema.OpenAIRequest true "query params"
+// @Success 200 {object} schema.OpenAIResponse "Response"
+// @Router /v1/images/generations [post]
 func ImageEndpoint(fce *fiberContext.FiberContextExtractor, igbs *backend.ImageGenerationBackendService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		// TODO: Somewhat a hack. Is there a better place to assign this?
diff --git a/core/http/endpoints/openai/transcription.go b/core/http/endpoints/openai/transcription.go
index f4d3ddd6c6a..572cec1288c 100644
--- a/core/http/endpoints/openai/transcription.go
+++ b/core/http/endpoints/openai/transcription.go
@@ -15,7 +15,13 @@ import (
 	"github.com/rs/zerolog/log"
 )
 
-// https://platform.openai.com/docs/api-reference/audio/create
+// TranscriptEndpoint is the OpenAI Whisper API endpoint https://platform.openai.com/docs/api-reference/audio/create
+// @Summary Transcribes audio into the input language.
+// @accept multipart/form-data
+// @Param model formData string true "model"
+// @Param file formData file true "file"
+// @Success 200 {object} map[string]string	 "Response"
+// @Router /v1/audio/transcriptions [post]
 func TranscriptEndpoint(fce *fiberContext.FiberContextExtractor, tbs *backend.TranscriptionBackendService) func(c *fiber.Ctx) error {
 	return func(c *fiber.Ctx) error {
 		_, request, err := fce.OpenAIRequestFromContext(c, false)
diff --git a/core/http/views/partials/navbar.html b/core/http/views/partials/navbar.html
index 2717f974c9a..c3d3223ffa7 100644
--- a/core/http/views/partials/navbar.html
+++ b/core/http/views/partials/navbar.html
@@ -9,6 +9,7 @@
             <div>
                 <a href="/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-home pr-2"></i>Home</a>
                 <a href="https://localai.io" class="text-gray-400 hover:text-white px-3 py-2 rounded" target="_blank" ><i class="fas fa-book-reader pr-2"></i> Documentation</a>
+                <a href="/swagger/" class="text-gray-400 hover:text-white px-3 py-2 rounded"><i class="fas fa-code pr-2"></i> API</a>
             </div>
         </div>
     </div>
diff --git a/docs/content/docs/getting-started/quickstart.md b/docs/content/docs/getting-started/quickstart.md
index 716fe154652..c56dced5f44 100644
--- a/docs/content/docs/getting-started/quickstart.md
+++ b/docs/content/docs/getting-started/quickstart.md
@@ -112,7 +112,7 @@ docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models loca
 
 ## Try it out
 
-LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [All-in-one Images]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`.
+LocalAI does not ship a webui by default, however you can use 3rd party projects to interact with it (see also [Integrations]({{%relref "docs/integrations" %}}) ). However, you can test out the API endpoints using `curl`, you can find few examples below.
 
 ### Text Generation
 
diff --git a/docs/content/docs/reference/aio-images.md b/docs/content/docs/reference/aio-images.md
index c2cb57ba9e5..40f01f06d93 100644
--- a/docs/content/docs/reference/aio-images.md
+++ b/docs/content/docs/reference/aio-images.md
@@ -46,7 +46,7 @@ The AIO Images are inheriting the same environment variables as the base images
 
 | Variable | Default | Description |
 | ---------------------| ------- | ----------- |
-| `SIZE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
+| `PROFILE` | Auto-detected | The size of the model to use. Available: `cpu`, `gpu-8g` |
 | `MODELS` | Auto-detected | A list of models YAML Configuration file URI/URL (see also [running models]({{%relref "docs/getting-started/run-other-models" %}})) |
 
 
diff --git a/embedded/models/hermes-2-pro-mistral.yaml b/embedded/models/hermes-2-pro-mistral.yaml
index 84510d2abe7..108216f5c39 100644
--- a/embedded/models/hermes-2-pro-mistral.yaml
+++ b/embedded/models/hermes-2-pro-mistral.yaml
@@ -3,30 +3,27 @@ mmap: true
 parameters:
   model: huggingface://NousResearch/Hermes-2-Pro-Mistral-7B-GGUF/Hermes-2-Pro-Mistral-7B.Q6_K.gguf
 
-roles:
-  assistant_function_call: assistant
-  function: tool
 template:
   chat_message: |
-    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "function"}}{{.Role}}{{else if eq .RoleName "user"}}user{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}<tool_call>{{end}}
-    {{ if eq .RoleName "function" }}<tool_result>{{end}}
+    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "tool"}}tool{{else if eq .RoleName "user"}}user{{end}}
+    {{ if .FunctionCall }}<tool_call>{{end}}
+    {{ if eq .RoleName "tool" }}<tool_result>{{end}}
     {{if .Content}}{{.Content}}{{end}}
     {{if .FunctionCall}}{{toJson .FunctionCall}}{{end}}
-    {{ if eq .RoleName "assistant_function_call" }}</tool_call>{{end}}
-    {{ if eq .RoleName "function" }}</tool_result>{{end}}
+    {{ if .FunctionCall }}</tool_call>{{end}}
+    {{ if eq .RoleName "tool" }}</tool_result>{{end}}
     <|im_end|>
   # https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B-GGUF#prompt-format-for-function-calling
   function: |
     <|im_start|>system
-    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: 
+    You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
     <tools>
     {{range .Functions}}
     {'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
     {{end}}
-    </tools> 
-    Use the following pydantic model json schema for each tool call you will make: 
-    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} 
+    </tools>
+    Use the following pydantic model json schema for each tool call you will make:
+    {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']}
     For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
     <tool_call>
     {'arguments': <args-dict>, 'name': <function-name>}
diff --git a/go.mod b/go.mod
index 369666b0680..2d97c3ca729 100644
--- a/go.mod
+++ b/go.mod
@@ -12,9 +12,10 @@ require (
 	github.com/go-audio/wav v1.1.0
 	github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1
 	github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428
-	github.com/gofiber/fiber/v2 v2.50.0
+	github.com/gofiber/fiber/v2 v2.52.0
 	github.com/gofiber/template/html/v2 v2.1.1
-	github.com/google/uuid v1.3.1
+	github.com/gomarkdown/markdown v0.0.0-20231222211730-1d6d20845b47
+	github.com/google/uuid v1.5.0
 	github.com/hashicorp/go-multierror v1.1.1
 	github.com/hpcloud/tail v1.0.0
 	github.com/imdario/mergo v0.3.16
@@ -32,10 +33,10 @@ require (
 	github.com/russross/blackfriday v1.6.0
 	github.com/sashabaranov/go-openai v1.20.4
 	github.com/schollz/progressbar/v3 v3.13.1
-	github.com/stretchr/testify v1.8.4
+	github.com/stretchr/testify v1.9.0
 	github.com/tmc/langchaingo v0.0.0-20231019140956-c636b3da7701
-	github.com/urfave/cli/v2 v2.25.7
-	github.com/valyala/fasthttp v1.50.0
+	github.com/urfave/cli/v2 v2.27.1
+	github.com/valyala/fasthttp v1.51.0
 	go.opentelemetry.io/otel v1.19.0
 	go.opentelemetry.io/otel/exporters/prometheus v0.42.0
 	go.opentelemetry.io/otel/metric v1.19.0
@@ -59,10 +60,13 @@ require (
 
 require (
 	github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78 // indirect
+	github.com/KyleBanks/depth v1.2.1 // indirect
 	github.com/Masterminds/goutils v1.1.1 // indirect
 	github.com/Masterminds/semver/v3 v3.2.0 // indirect
 	github.com/Microsoft/go-winio v0.6.0 // indirect
 	github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect
+	github.com/PuerkitoBio/purell v1.2.1 // indirect
+	github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
 	github.com/alecthomas/chroma v0.10.0 // indirect
 	github.com/aymanbagabas/go-osc52 v1.0.3 // indirect
 	github.com/aymerick/douceur v0.2.0 // indirect
@@ -78,6 +82,11 @@ require (
 	github.com/docker/go-units v0.4.0 // indirect
 	github.com/dsnet/compress v0.0.2-0.20210315054119-f66993602bf5 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/go-openapi/jsonpointer v0.21.0 // indirect
+	github.com/go-openapi/jsonreference v0.21.0 // indirect
+	github.com/go-openapi/spec v0.21.0 // indirect
+	github.com/go-openapi/swag v0.23.0 // indirect
+	github.com/gofiber/swagger v1.0.0 // indirect
 	github.com/gofiber/template v1.8.3 // indirect
 	github.com/gofiber/utils v1.1.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
@@ -86,8 +95,10 @@ require (
 	github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
 	github.com/gorilla/css v1.0.0 // indirect
 	github.com/huandu/xstrings v1.3.3 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
 	github.com/klauspost/pgzip v1.2.5 // indirect
 	github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
 	github.com/microcosm-cc/bluemonday v1.0.26 // indirect
 	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
@@ -112,6 +123,8 @@ require (
 	github.com/shopspring/decimal v1.2.0 // indirect
 	github.com/sirupsen/logrus v1.8.1 // indirect
 	github.com/spf13/cast v1.3.1 // indirect
+	github.com/swaggo/files/v2 v2.0.0 // indirect
+	github.com/swaggo/swag v1.16.3 // indirect
 	github.com/ulikunitz/xz v0.5.9 // indirect
 	github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f // indirect
 	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
@@ -121,17 +134,18 @@ require (
 	github.com/yuin/goldmark-emoji v1.0.1 // indirect
 	go.opentelemetry.io/otel/sdk v1.19.0 // indirect
 	go.opentelemetry.io/otel/trace v1.19.0 // indirect
-	golang.org/x/crypto v0.14.0 // indirect
-	golang.org/x/mod v0.12.0 // indirect
-	golang.org/x/term v0.13.0 // indirect
+	golang.org/x/crypto v0.21.0 // indirect
+	golang.org/x/mod v0.16.0 // indirect
+	golang.org/x/term v0.18.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect
 	gopkg.in/fsnotify.v1 v1.4.7 // indirect
 	gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
+	sigs.k8s.io/yaml v1.4.0 // indirect
 )
 
 require (
 	github.com/andybalholm/brotli v1.0.5 // indirect
-	github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
+	github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
 	github.com/go-audio/audio v1.0.0 // indirect
 	github.com/go-audio/riff v1.0.0 // indirect
 	github.com/go-logr/logr v1.2.4 // indirect
@@ -139,18 +153,18 @@ require (
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38 // indirect
 	github.com/hashicorp/errwrap v1.0.0 // indirect
-	github.com/klauspost/compress v1.16.7 // indirect
+	github.com/klauspost/compress v1.17.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
-	github.com/mattn/go-isatty v0.0.19 // indirect
+	github.com/mattn/go-isatty v0.0.20 // indirect
 	github.com/mattn/go-runewidth v0.0.15 // indirect
 	github.com/mudler/go-piper v0.0.0-20230621222733-56b8a81b4760
 	github.com/rivo/uniseg v0.2.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/valyala/tcplisten v1.0.0 // indirect
-	github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
-	golang.org/x/net v0.17.0 // indirect
-	golang.org/x/sys v0.17.0 // indirect
-	golang.org/x/text v0.13.0 // indirect
-	golang.org/x/tools v0.12.0 // indirect
+	github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
+	golang.org/x/net v0.22.0 // indirect
+	golang.org/x/sys v0.18.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
+	golang.org/x/tools v0.19.0 // indirect
 )
diff --git a/go.sum b/go.sum
index 035e61c36b7..c88dc790740 100644
--- a/go.sum
+++ b/go.sum
@@ -3,6 +3,8 @@ github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf h1:UgjXLcE9I+VaVz7uBIlzAnyZIXwiDlIiTWqCh159aUI=
 github.com/M0Rf30/go-tiny-dream v0.0.0-20231128165230-772a9c0d9aaf/go.mod h1:UOf2Mb/deUri5agct5OJ4SLWjhI+kZKbsUVUeRb24I0=
+github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
+github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
 github.com/Masterminds/goutils v1.1.1 h1:5nUrii3FMTL5diU80unEVvNevw1nH4+ZV4DSLVJLSYI=
 github.com/Masterminds/goutils v1.1.1/go.mod h1:8cTjp+g8YejhMuvIA5y2vz3BpJxksy863GQaJW2MFNU=
 github.com/Masterminds/semver/v3 v3.2.0 h1:3MEsd0SM6jqZojhjLWWeBY+Kcjy9i6MQAeY7YgDP83g=
@@ -13,6 +15,10 @@ github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2y
 github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
 github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 h1:TngWCqHvy9oXAN6lEVMRuU21PR1EtLVZJmdB18Gu3Rw=
 github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5/go.mod h1:lmUJ/7eu/Q8D7ML55dXQrVaamCz2vxCfdQBasLZfHKk=
+github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28=
+github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
+github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
 github.com/alecthomas/chroma v0.10.0 h1:7XDcGkCQopCNKjZHfYrNLraA+M7e0fMiJ/Mfikbfjek=
 github.com/alecthomas/chroma v0.10.0/go.mod h1:jtJATyUxlIORhUOFNA9NZDWGAQ8wpxQQqNSB4rjA/1s=
 github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
@@ -43,6 +49,8 @@ github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSV
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/cpuguy83/go-md2man/v2 v2.0.4 h1:wfIWP927BUkWJb2NmU/kNDYIBTh/ziUX91+lVfRxZq4=
+github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/creack/pty v1.1.11 h1:07n33Z8lZxZ2qwegKbObQohDhXDQxiMMz1NOUGYlesw=
 github.com/creack/pty v1.1.11/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
@@ -87,6 +95,14 @@ github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
 github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1 h1:yXvc7QfGtoZ51tUW/YVjoTwAfh8HG88XU7UOrbNlz5Y=
 github.com/go-skynet/go-bert.cpp v0.0.0-20230716133540-6abe312cded1/go.mod h1:fYjkCDRzC+oRLHSjQoajmYK6AmeJnmEanV27CClAcDc=
+github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
+github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
+github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
+github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
+github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY=
+github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
+github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
+github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428 h1:WYjkXL0Nw7dN2uDBMVCWQ8xLavrIhjF/DLczuh5L9TY=
 github.com/go-skynet/go-llama.cpp v0.0.0-20231009155254-aeba71ee8428/go.mod h1:iub0ugfTnflE3rcIuqV2pQSo15nEw3GLW/utm5gyERo=
 github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI=
@@ -98,6 +114,10 @@ github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5x
 github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/gofiber/fiber/v2 v2.50.0 h1:ia0JaB+uw3GpNSCR5nvC5dsaxXjRU5OEu36aytx+zGw=
 github.com/gofiber/fiber/v2 v2.50.0/go.mod h1:21eytvay9Is7S6z+OgPi7c7n4++tnClWmhpimVHMimw=
+github.com/gofiber/fiber/v2 v2.52.0 h1:S+qXi7y+/Pgvqq4DrSmREGiFwtB7Bu6+QFLuIHYw/UE=
+github.com/gofiber/fiber/v2 v2.52.0/go.mod h1:KEOE+cXMhXG0zHc9d8+E38hoX+ZN7bhOtgeF2oT6jrQ=
+github.com/gofiber/swagger v1.0.0 h1:BzUzDS9ZT6fDUa692kxmfOjc1DZiloLiPK/W5z1H1tc=
+github.com/gofiber/swagger v1.0.0/go.mod h1:QrYNF1Yrc7ggGK6ATsJ6yfH/8Zi5bu9lA7wB8TmCecg=
 github.com/gofiber/template v1.8.3 h1:hzHdvMwMo/T2kouz2pPCA0zGiLCeMnoGsQZBTSYgZxc=
 github.com/gofiber/template v1.8.3/go.mod h1:bs/2n0pSNPOkRa5VJ8zTIvedcI/lEYxzV3+YPXdBvq8=
 github.com/gofiber/template/html/v2 v2.1.1 h1:QEy3O3EBkvwDthy5bXVGUseOyO6ldJoiDxlF4+MJiV8=
@@ -135,6 +155,8 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3
 github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
 github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
+github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
 github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
 github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
@@ -149,6 +171,8 @@ github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:
 github.com/imdario/mergo v0.3.11/go.mod h1:jmQim1M+e3UYxmgPu/WyfjB3N3VflVyUjjjwH0dnCYA=
 github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
 github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
 github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
@@ -156,6 +180,8 @@ github.com/klauspost/compress v1.4.1/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0
 github.com/klauspost/compress v1.11.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs=
 github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
 github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
+github.com/klauspost/compress v1.17.0/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
 github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
@@ -172,12 +198,16 @@ github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69
 github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
 github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4=
 github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
+github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
 github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
 github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
@@ -312,6 +342,12 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
 github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/swaggo/files/v2 v2.0.0 h1:hmAt8Dkynw7Ssz46F6pn8ok6YmGZqHSVLZ+HQM7i0kw=
+github.com/swaggo/files/v2 v2.0.0/go.mod h1:24kk2Y9NYEJ5lHuCra6iVwkMjIekMCaFq/0JQj66kyM=
+github.com/swaggo/swag v1.16.3 h1:PnCYjPCah8FK4I26l2F/KQ4yz3sILcVUN3cTlBFA9Pg=
+github.com/swaggo/swag v1.16.3/go.mod h1:DImHIuOFXKpMFAQjcC7FG4m3Dg4+QuUgUzJmKjI/gRk=
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
 github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI=
 github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU=
@@ -327,10 +363,14 @@ github.com/ulikunitz/xz v0.5.9/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oW
 github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
 github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
+github.com/urfave/cli/v2 v2.27.1 h1:8xSQ6szndafKVRmfyeUMxkNUJQMjL1F2zmsZ+qHpfho=
+github.com/urfave/cli/v2 v2.27.1/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
 github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
 github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
 github.com/valyala/fasthttp v1.50.0 h1:H7fweIlBm0rXLs2q0XbalvJ6r0CUPFWK3/bB4N13e9M=
 github.com/valyala/fasthttp v1.50.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
+github.com/valyala/fasthttp v1.51.0 h1:8b30A5JlZ6C7AS81RsWjYMQmrZG6feChmgAolCl1SqA=
+github.com/valyala/fasthttp v1.51.0/go.mod h1:oI2XroL+lI7vdXyYoQk03bXBThfFl2cVdIA3Xl7cH8g=
 github.com/valyala/tcplisten v1.0.0 h1:rBHj/Xf+E1tRGZyWIWwJDiRY0zc1Js+CV5DqwacVSA8=
 github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
 github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
@@ -345,6 +385,8 @@ github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 h1:nIPpBwaJSVYIxUFsDv3M8ofm
 github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8/go.mod h1:HUYIGzjTL3rfEspMxjDjgmT5uz5wzYJKVo23qUhYTos=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
 github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
+github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 h1:+qGGcbkzsfDQNPPe9UDgpxAWQrhbbBXOYJFQDq/dtJw=
+github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913/go.mod h1:4aEEwZQutDLsQv2Deui4iYQ6DWTxR14g6m8Wv88+Xqk=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
@@ -373,11 +415,15 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
 golang.org/x/crypto v0.3.0/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
 golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
 golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
+golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
 golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
 golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc=
 golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
+golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
 golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
@@ -393,6 +439,8 @@ golang.org/x/net v0.0.0-20221002022538-bcab6841153b/go.mod h1:YDH+HFinaLZZlnHAfS
 golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
 golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
 golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
+golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
+golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -401,6 +449,7 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
 golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
 golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -436,12 +485,16 @@ golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
 golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
 golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
 golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
 golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
+golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
+golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
@@ -449,6 +502,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
 golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
 golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
@@ -458,6 +513,8 @@ golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
 golang.org/x/tools v0.12.0 h1:YW6HUoUmYBpwSgyaGaZq1fHjrBjX1rlpZ54T6mu2kss=
 golang.org/x/tools v0.12.0/go.mod h1:Sc0INKfu04TlqNoRA1hgpFZbhYXHPr4V5DzpSBTPqQM=
+golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
+golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -496,3 +553,5 @@ gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
 gotest.tools/v3 v3.3.0 h1:MfDY1b1/0xN1CyMlQDac0ziEy9zJQd9CXBRRDHw2jJo=
 gotest.tools/v3 v3.3.0/go.mod h1:Mcr9QNxkg0uMvy/YElmo4SpXgJKWgQvYrT7Kw5RzJ1A=
+sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
+sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
diff --git a/main.go b/main.go
index cde42776a5b..2d107d0f40f 100644
--- a/main.go
+++ b/main.go
@@ -26,6 +26,8 @@ import (
 	"github.com/rs/zerolog/log"
 	progressbar "github.com/schollz/progressbar/v3"
 	"github.com/urfave/cli/v2"
+
+	_ "github.com/go-skynet/LocalAI/swagger"
 )
 
 const (
diff --git a/swagger/docs.go b/swagger/docs.go
new file mode 100644
index 00000000000..e01996736c7
--- /dev/null
+++ b/swagger/docs.go
@@ -0,0 +1,801 @@
+// Code generated by swaggo/swag. DO NOT EDIT.
+
+package swagger
+
+import "github.com/swaggo/swag"
+
+const docTemplate = `{
+    "schemes": {{ marshal .Schemes }},
+    "swagger": "2.0",
+    "info": {
+        "description": "{{escape .Description}}",
+        "title": "{{.Title}}",
+        "contact": {
+            "name": "LocalAI",
+            "url": "https://localai.io"
+        },
+        "license": {
+            "name": "MIT",
+            "url": "https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE"
+        },
+        "version": "{{.Version}}"
+    },
+    "host": "{{.Host}}",
+    "basePath": "{{.BasePath}}",
+    "paths": {
+        "/v1/assistants": {
+            "post": {
+                "summary": "Create an assistant with a model and instructions.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/openai.AssistantRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/openai.Assistant"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/audio/speech": {
+            "post": {
+                "summary": "Generates audio from the input text.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TTSRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/audio/transcriptions": {
+            "post": {
+                "consumes": [
+                    "multipart/form-data"
+                ],
+                "summary": "Transcribes audio into the input language.",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "model",
+                        "name": "model",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "file",
+                        "description": "file",
+                        "name": "file",
+                        "in": "formData",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/chat/completions": {
+            "post": {
+                "summary": "Generate a chat completions for a given prompt and model.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/completions": {
+            "post": {
+                "summary": "Generate completions for a given prompt and model.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/embeddings": {
+            "post": {
+                "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/images/generations": {
+            "post": {
+                "summary": "Creates an image given a prompt.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/text-to-speech/{voice-id}": {
+            "post": {
+                "summary": "Generates audio from the input text.",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Account ID",
+                        "name": "voice-id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TTSRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "grammar.Argument": {
+            "type": "object",
+            "properties": {
+                "properties": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "grammar.Function": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "parameters": {
+                    "type": "object",
+                    "additionalProperties": true
+                }
+            }
+        },
+        "grammar.FunctionName": {
+            "type": "object",
+            "properties": {
+                "const": {
+                    "type": "string"
+                }
+            }
+        },
+        "grammar.Item": {
+            "type": "object",
+            "properties": {
+                "properties": {
+                    "$ref": "#/definitions/grammar.Properties"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "grammar.JSONFunctionStructure": {
+            "type": "object",
+            "properties": {
+                "$defs": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "anyOf": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Item"
+                    }
+                },
+                "oneOf": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Item"
+                    }
+                }
+            }
+        },
+        "grammar.Properties": {
+            "type": "object",
+            "properties": {
+                "arguments": {
+                    "$ref": "#/definitions/grammar.Argument"
+                },
+                "function": {
+                    "$ref": "#/definitions/grammar.FunctionName"
+                }
+            }
+        },
+        "grammar.Tool": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "$ref": "#/definitions/grammar.Function"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "openai.Assistant": {
+            "type": "object",
+            "properties": {
+                "created": {
+                    "description": "The time at which the assistant was created.",
+                    "type": "integer"
+                },
+                "description": {
+                    "description": "The description of the assistant.",
+                    "type": "string"
+                },
+                "file_ids": {
+                    "description": "A list of file IDs attached to this assistant.",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "id": {
+                    "description": "The unique identifier of the assistant.",
+                    "type": "string"
+                },
+                "instructions": {
+                    "description": "The system instructions that the assistant uses.",
+                    "type": "string"
+                },
+                "metadata": {
+                    "description": "Set of key-value pairs attached to the assistant.",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "model": {
+                    "description": "The model ID used by the assistant.",
+                    "type": "string"
+                },
+                "name": {
+                    "description": "The name of the assistant.",
+                    "type": "string"
+                },
+                "object": {
+                    "description": "Object type, which is \"assistant\".",
+                    "type": "string"
+                },
+                "tools": {
+                    "description": "A list of tools enabled on the assistant.",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/openai.Tool"
+                    }
+                }
+            }
+        },
+        "openai.AssistantRequest": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string"
+                },
+                "file_ids": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "instructions": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "model": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "tools": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/openai.Tool"
+                    }
+                }
+            }
+        },
+        "openai.Tool": {
+            "type": "object",
+            "properties": {
+                "type": {
+                    "$ref": "#/definitions/openai.ToolType"
+                }
+            }
+        },
+        "openai.ToolType": {
+            "type": "string",
+            "enum": [
+                "code_interpreter",
+                "retrieval",
+                "function"
+            ],
+            "x-enum-varnames": [
+                "CodeInterpreter",
+                "Retrieval",
+                "Function"
+            ]
+        },
+        "schema.ChatCompletionResponseFormat": {
+            "type": "object",
+            "properties": {
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.Choice": {
+            "type": "object",
+            "properties": {
+                "delta": {
+                    "$ref": "#/definitions/schema.Message"
+                },
+                "finish_reason": {
+                    "type": "string"
+                },
+                "index": {
+                    "type": "integer"
+                },
+                "message": {
+                    "$ref": "#/definitions/schema.Message"
+                },
+                "text": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.FunctionCall": {
+            "type": "object",
+            "properties": {
+                "arguments": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.Item": {
+            "type": "object",
+            "properties": {
+                "b64_json": {
+                    "type": "string"
+                },
+                "embedding": {
+                    "type": "array",
+                    "items": {
+                        "type": "number"
+                    }
+                },
+                "index": {
+                    "type": "integer"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "url": {
+                    "description": "Images",
+                    "type": "string"
+                }
+            }
+        },
+        "schema.Message": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "description": "The message content"
+                },
+                "function_call": {
+                    "description": "A result of a function call"
+                },
+                "name": {
+                    "description": "The message name (used for tools calls)",
+                    "type": "string"
+                },
+                "role": {
+                    "description": "The message role",
+                    "type": "string"
+                },
+                "string_content": {
+                    "type": "string"
+                },
+                "string_images": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "tool_calls": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.ToolCall"
+                    }
+                }
+            }
+        },
+        "schema.OpenAIRequest": {
+            "type": "object",
+            "required": [
+                "file"
+            ],
+            "properties": {
+                "backend": {
+                    "type": "string"
+                },
+                "batch": {
+                    "description": "Custom parameters - not present in the OpenAI API",
+                    "type": "integer"
+                },
+                "clip_skip": {
+                    "description": "Diffusers",
+                    "type": "integer"
+                },
+                "echo": {
+                    "type": "boolean"
+                },
+                "file": {
+                    "description": "whisper",
+                    "type": "string"
+                },
+                "frequency_penalty": {
+                    "type": "number"
+                },
+                "function_call": {
+                    "description": "might be a string or an object"
+                },
+                "functions": {
+                    "description": "A list of available functions to call",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Function"
+                    }
+                },
+                "grammar": {
+                    "description": "A grammar to constrain the LLM output",
+                    "type": "string"
+                },
+                "grammar_json_functions": {
+                    "$ref": "#/definitions/grammar.JSONFunctionStructure"
+                },
+                "ignore_eos": {
+                    "type": "boolean"
+                },
+                "input": {},
+                "instruction": {
+                    "description": "Edit endpoint",
+                    "type": "string"
+                },
+                "language": {
+                    "description": "Also part of the OpenAI official spec",
+                    "type": "string"
+                },
+                "max_tokens": {
+                    "type": "integer"
+                },
+                "messages": {
+                    "description": "Messages is read only by chat/completion API calls",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.Message"
+                    }
+                },
+                "mode": {
+                    "description": "Image (not supported by OpenAI)",
+                    "type": "integer"
+                },
+                "model": {
+                    "description": "Also part of the OpenAI official spec",
+                    "type": "string"
+                },
+                "model_base_name": {
+                    "description": "AutoGPTQ",
+                    "type": "string"
+                },
+                "n": {
+                    "description": "Also part of the OpenAI official spec. use it for returning multiple results",
+                    "type": "integer"
+                },
+                "n_keep": {
+                    "type": "integer"
+                },
+                "negative_prompt": {
+                    "type": "string"
+                },
+                "negative_prompt_scale": {
+                    "type": "number"
+                },
+                "presence_penalty": {
+                    "type": "number"
+                },
+                "prompt": {
+                    "description": "Prompt is read only by completion/image API calls"
+                },
+                "repeat_penalty": {
+                    "type": "number"
+                },
+                "response_format": {
+                    "description": "whisper/image",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/schema.ChatCompletionResponseFormat"
+                        }
+                    ]
+                },
+                "rope_freq_base": {
+                    "type": "number"
+                },
+                "rope_freq_scale": {
+                    "type": "number"
+                },
+                "seed": {
+                    "type": "integer"
+                },
+                "size": {
+                    "description": "image",
+                    "type": "string"
+                },
+                "step": {
+                    "type": "integer"
+                },
+                "stop": {},
+                "stream": {
+                    "type": "boolean"
+                },
+                "temperature": {
+                    "type": "number"
+                },
+                "tfz": {
+                    "type": "number"
+                },
+                "tokenizer": {
+                    "description": "RWKV (?)",
+                    "type": "string"
+                },
+                "tool_choice": {},
+                "tools": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Tool"
+                    }
+                },
+                "top_k": {
+                    "type": "integer"
+                },
+                "top_p": {
+                    "description": "Common options between all the API calls, part of the OpenAI spec",
+                    "type": "number"
+                },
+                "typical_p": {
+                    "type": "number"
+                },
+                "use_fast_tokenizer": {
+                    "description": "AutoGPTQ",
+                    "type": "boolean"
+                }
+            }
+        },
+        "schema.OpenAIResponse": {
+            "type": "object",
+            "properties": {
+                "choices": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.Choice"
+                    }
+                },
+                "created": {
+                    "type": "integer"
+                },
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.Item"
+                    }
+                },
+                "id": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "usage": {
+                    "$ref": "#/definitions/schema.OpenAIUsage"
+                }
+            }
+        },
+        "schema.OpenAIUsage": {
+            "type": "object",
+            "properties": {
+                "completion_tokens": {
+                    "type": "integer"
+                },
+                "prompt_tokens": {
+                    "type": "integer"
+                },
+                "total_tokens": {
+                    "type": "integer"
+                }
+            }
+        },
+        "schema.TTSRequest": {
+            "type": "object",
+            "properties": {
+                "backend": {
+                    "type": "string"
+                },
+                "input": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                },
+                "voice": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.ToolCall": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "$ref": "#/definitions/schema.FunctionCall"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "index": {
+                    "type": "integer"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        }
+    },
+    "securityDefinitions": {
+        "BearerAuth": {
+            "type": "apiKey",
+            "name": "Authorization",
+            "in": "header"
+        }
+    }
+}`
+
+// SwaggerInfo holds exported Swagger Info so clients can modify it
+var SwaggerInfo = &swag.Spec{
+	Version:          "2.0.0",
+	Host:             "",
+	BasePath:         "/",
+	Schemes:          []string{},
+	Title:            "LocalAI API",
+	Description:      "The LocalAI Rest API.",
+	InfoInstanceName: "swagger",
+	SwaggerTemplate:  docTemplate,
+	LeftDelim:        "{{",
+	RightDelim:       "}}",
+}
+
+func init() {
+	swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo)
+}
diff --git a/swagger/swagger.json b/swagger/swagger.json
new file mode 100644
index 00000000000..4d7102c42ea
--- /dev/null
+++ b/swagger/swagger.json
@@ -0,0 +1,775 @@
+{
+    "swagger": "2.0",
+    "info": {
+        "description": "The LocalAI Rest API.",
+        "title": "LocalAI API",
+        "contact": {
+            "name": "LocalAI",
+            "url": "https://localai.io"
+        },
+        "license": {
+            "name": "MIT",
+            "url": "https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE"
+        },
+        "version": "2.0.0"
+    },
+    "basePath": "/",
+    "paths": {
+        "/v1/assistants": {
+            "post": {
+                "summary": "Create an assistant with a model and instructions.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/openai.AssistantRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/openai.Assistant"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/audio/speech": {
+            "post": {
+                "summary": "Generates audio from the input text.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TTSRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/audio/transcriptions": {
+            "post": {
+                "consumes": [
+                    "multipart/form-data"
+                ],
+                "summary": "Transcribes audio into the input language.",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "model",
+                        "name": "model",
+                        "in": "formData",
+                        "required": true
+                    },
+                    {
+                        "type": "file",
+                        "description": "file",
+                        "name": "file",
+                        "in": "formData",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "type": "string"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/chat/completions": {
+            "post": {
+                "summary": "Generate a chat completions for a given prompt and model.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/completions": {
+            "post": {
+                "summary": "Generate completions for a given prompt and model.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/embeddings": {
+            "post": {
+                "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/images/generations": {
+            "post": {
+                "summary": "Creates an image given a prompt.",
+                "parameters": [
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "$ref": "#/definitions/schema.OpenAIResponse"
+                        }
+                    }
+                }
+            }
+        },
+        "/v1/text-to-speech/{voice-id}": {
+            "post": {
+                "summary": "Generates audio from the input text.",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Account ID",
+                        "name": "voice-id",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "query params",
+                        "name": "request",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/schema.TTSRequest"
+                        }
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Response",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "definitions": {
+        "grammar.Argument": {
+            "type": "object",
+            "properties": {
+                "properties": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "grammar.Function": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "parameters": {
+                    "type": "object",
+                    "additionalProperties": true
+                }
+            }
+        },
+        "grammar.FunctionName": {
+            "type": "object",
+            "properties": {
+                "const": {
+                    "type": "string"
+                }
+            }
+        },
+        "grammar.Item": {
+            "type": "object",
+            "properties": {
+                "properties": {
+                    "$ref": "#/definitions/grammar.Properties"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "grammar.JSONFunctionStructure": {
+            "type": "object",
+            "properties": {
+                "$defs": {
+                    "type": "object",
+                    "additionalProperties": true
+                },
+                "anyOf": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Item"
+                    }
+                },
+                "oneOf": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Item"
+                    }
+                }
+            }
+        },
+        "grammar.Properties": {
+            "type": "object",
+            "properties": {
+                "arguments": {
+                    "$ref": "#/definitions/grammar.Argument"
+                },
+                "function": {
+                    "$ref": "#/definitions/grammar.FunctionName"
+                }
+            }
+        },
+        "grammar.Tool": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "$ref": "#/definitions/grammar.Function"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "openai.Assistant": {
+            "type": "object",
+            "properties": {
+                "created": {
+                    "description": "The time at which the assistant was created.",
+                    "type": "integer"
+                },
+                "description": {
+                    "description": "The description of the assistant.",
+                    "type": "string"
+                },
+                "file_ids": {
+                    "description": "A list of file IDs attached to this assistant.",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "id": {
+                    "description": "The unique identifier of the assistant.",
+                    "type": "string"
+                },
+                "instructions": {
+                    "description": "The system instructions that the assistant uses.",
+                    "type": "string"
+                },
+                "metadata": {
+                    "description": "Set of key-value pairs attached to the assistant.",
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "model": {
+                    "description": "The model ID used by the assistant.",
+                    "type": "string"
+                },
+                "name": {
+                    "description": "The name of the assistant.",
+                    "type": "string"
+                },
+                "object": {
+                    "description": "Object type, which is \"assistant\".",
+                    "type": "string"
+                },
+                "tools": {
+                    "description": "A list of tools enabled on the assistant.",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/openai.Tool"
+                    }
+                }
+            }
+        },
+        "openai.AssistantRequest": {
+            "type": "object",
+            "properties": {
+                "description": {
+                    "type": "string"
+                },
+                "file_ids": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "instructions": {
+                    "type": "string"
+                },
+                "metadata": {
+                    "type": "object",
+                    "additionalProperties": {
+                        "type": "string"
+                    }
+                },
+                "model": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                },
+                "tools": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/openai.Tool"
+                    }
+                }
+            }
+        },
+        "openai.Tool": {
+            "type": "object",
+            "properties": {
+                "type": {
+                    "$ref": "#/definitions/openai.ToolType"
+                }
+            }
+        },
+        "openai.ToolType": {
+            "type": "string",
+            "enum": [
+                "code_interpreter",
+                "retrieval",
+                "function"
+            ],
+            "x-enum-varnames": [
+                "CodeInterpreter",
+                "Retrieval",
+                "Function"
+            ]
+        },
+        "schema.ChatCompletionResponseFormat": {
+            "type": "object",
+            "properties": {
+                "type": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.Choice": {
+            "type": "object",
+            "properties": {
+                "delta": {
+                    "$ref": "#/definitions/schema.Message"
+                },
+                "finish_reason": {
+                    "type": "string"
+                },
+                "index": {
+                    "type": "integer"
+                },
+                "message": {
+                    "$ref": "#/definitions/schema.Message"
+                },
+                "text": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.FunctionCall": {
+            "type": "object",
+            "properties": {
+                "arguments": {
+                    "type": "string"
+                },
+                "name": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.Item": {
+            "type": "object",
+            "properties": {
+                "b64_json": {
+                    "type": "string"
+                },
+                "embedding": {
+                    "type": "array",
+                    "items": {
+                        "type": "number"
+                    }
+                },
+                "index": {
+                    "type": "integer"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "url": {
+                    "description": "Images",
+                    "type": "string"
+                }
+            }
+        },
+        "schema.Message": {
+            "type": "object",
+            "properties": {
+                "content": {
+                    "description": "The message content"
+                },
+                "function_call": {
+                    "description": "A result of a function call"
+                },
+                "name": {
+                    "description": "The message name (used for tools calls)",
+                    "type": "string"
+                },
+                "role": {
+                    "description": "The message role",
+                    "type": "string"
+                },
+                "string_content": {
+                    "type": "string"
+                },
+                "string_images": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "tool_calls": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.ToolCall"
+                    }
+                }
+            }
+        },
+        "schema.OpenAIRequest": {
+            "type": "object",
+            "required": [
+                "file"
+            ],
+            "properties": {
+                "backend": {
+                    "type": "string"
+                },
+                "batch": {
+                    "description": "Custom parameters - not present in the OpenAI API",
+                    "type": "integer"
+                },
+                "clip_skip": {
+                    "description": "Diffusers",
+                    "type": "integer"
+                },
+                "echo": {
+                    "type": "boolean"
+                },
+                "file": {
+                    "description": "whisper",
+                    "type": "string"
+                },
+                "frequency_penalty": {
+                    "type": "number"
+                },
+                "function_call": {
+                    "description": "might be a string or an object"
+                },
+                "functions": {
+                    "description": "A list of available functions to call",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Function"
+                    }
+                },
+                "grammar": {
+                    "description": "A grammar to constrain the LLM output",
+                    "type": "string"
+                },
+                "grammar_json_functions": {
+                    "$ref": "#/definitions/grammar.JSONFunctionStructure"
+                },
+                "ignore_eos": {
+                    "type": "boolean"
+                },
+                "input": {},
+                "instruction": {
+                    "description": "Edit endpoint",
+                    "type": "string"
+                },
+                "language": {
+                    "description": "Also part of the OpenAI official spec",
+                    "type": "string"
+                },
+                "max_tokens": {
+                    "type": "integer"
+                },
+                "messages": {
+                    "description": "Messages is read only by chat/completion API calls",
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.Message"
+                    }
+                },
+                "mode": {
+                    "description": "Image (not supported by OpenAI)",
+                    "type": "integer"
+                },
+                "model": {
+                    "description": "Also part of the OpenAI official spec",
+                    "type": "string"
+                },
+                "model_base_name": {
+                    "description": "AutoGPTQ",
+                    "type": "string"
+                },
+                "n": {
+                    "description": "Also part of the OpenAI official spec. use it for returning multiple results",
+                    "type": "integer"
+                },
+                "n_keep": {
+                    "type": "integer"
+                },
+                "negative_prompt": {
+                    "type": "string"
+                },
+                "negative_prompt_scale": {
+                    "type": "number"
+                },
+                "presence_penalty": {
+                    "type": "number"
+                },
+                "prompt": {
+                    "description": "Prompt is read only by completion/image API calls"
+                },
+                "repeat_penalty": {
+                    "type": "number"
+                },
+                "response_format": {
+                    "description": "whisper/image",
+                    "allOf": [
+                        {
+                            "$ref": "#/definitions/schema.ChatCompletionResponseFormat"
+                        }
+                    ]
+                },
+                "rope_freq_base": {
+                    "type": "number"
+                },
+                "rope_freq_scale": {
+                    "type": "number"
+                },
+                "seed": {
+                    "type": "integer"
+                },
+                "size": {
+                    "description": "image",
+                    "type": "string"
+                },
+                "step": {
+                    "type": "integer"
+                },
+                "stop": {},
+                "stream": {
+                    "type": "boolean"
+                },
+                "temperature": {
+                    "type": "number"
+                },
+                "tfz": {
+                    "type": "number"
+                },
+                "tokenizer": {
+                    "description": "RWKV (?)",
+                    "type": "string"
+                },
+                "tool_choice": {},
+                "tools": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/grammar.Tool"
+                    }
+                },
+                "top_k": {
+                    "type": "integer"
+                },
+                "top_p": {
+                    "description": "Common options between all the API calls, part of the OpenAI spec",
+                    "type": "number"
+                },
+                "typical_p": {
+                    "type": "number"
+                },
+                "use_fast_tokenizer": {
+                    "description": "AutoGPTQ",
+                    "type": "boolean"
+                }
+            }
+        },
+        "schema.OpenAIResponse": {
+            "type": "object",
+            "properties": {
+                "choices": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.Choice"
+                    }
+                },
+                "created": {
+                    "type": "integer"
+                },
+                "data": {
+                    "type": "array",
+                    "items": {
+                        "$ref": "#/definitions/schema.Item"
+                    }
+                },
+                "id": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                },
+                "object": {
+                    "type": "string"
+                },
+                "usage": {
+                    "$ref": "#/definitions/schema.OpenAIUsage"
+                }
+            }
+        },
+        "schema.OpenAIUsage": {
+            "type": "object",
+            "properties": {
+                "completion_tokens": {
+                    "type": "integer"
+                },
+                "prompt_tokens": {
+                    "type": "integer"
+                },
+                "total_tokens": {
+                    "type": "integer"
+                }
+            }
+        },
+        "schema.TTSRequest": {
+            "type": "object",
+            "properties": {
+                "backend": {
+                    "type": "string"
+                },
+                "input": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                },
+                "voice": {
+                    "type": "string"
+                }
+            }
+        },
+        "schema.ToolCall": {
+            "type": "object",
+            "properties": {
+                "function": {
+                    "$ref": "#/definitions/schema.FunctionCall"
+                },
+                "id": {
+                    "type": "string"
+                },
+                "index": {
+                    "type": "integer"
+                },
+                "type": {
+                    "type": "string"
+                }
+            }
+        }
+    },
+    "securityDefinitions": {
+        "BearerAuth": {
+            "type": "apiKey",
+            "name": "Authorization",
+            "in": "header"
+        }
+    }
+}
\ No newline at end of file
diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml
new file mode 100644
index 00000000000..86caff8acaf
--- /dev/null
+++ b/swagger/swagger.yaml
@@ -0,0 +1,518 @@
+basePath: /
+definitions:
+  grammar.Argument:
+    properties:
+      properties:
+        additionalProperties: true
+        type: object
+      type:
+        type: string
+    type: object
+  grammar.Function:
+    properties:
+      description:
+        type: string
+      name:
+        type: string
+      parameters:
+        additionalProperties: true
+        type: object
+    type: object
+  grammar.FunctionName:
+    properties:
+      const:
+        type: string
+    type: object
+  grammar.Item:
+    properties:
+      properties:
+        $ref: '#/definitions/grammar.Properties'
+      type:
+        type: string
+    type: object
+  grammar.JSONFunctionStructure:
+    properties:
+      $defs:
+        additionalProperties: true
+        type: object
+      anyOf:
+        items:
+          $ref: '#/definitions/grammar.Item'
+        type: array
+      oneOf:
+        items:
+          $ref: '#/definitions/grammar.Item'
+        type: array
+    type: object
+  grammar.Properties:
+    properties:
+      arguments:
+        $ref: '#/definitions/grammar.Argument'
+      function:
+        $ref: '#/definitions/grammar.FunctionName'
+    type: object
+  grammar.Tool:
+    properties:
+      function:
+        $ref: '#/definitions/grammar.Function'
+      type:
+        type: string
+    type: object
+  openai.Assistant:
+    properties:
+      created:
+        description: The time at which the assistant was created.
+        type: integer
+      description:
+        description: The description of the assistant.
+        type: string
+      file_ids:
+        description: A list of file IDs attached to this assistant.
+        items:
+          type: string
+        type: array
+      id:
+        description: The unique identifier of the assistant.
+        type: string
+      instructions:
+        description: The system instructions that the assistant uses.
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        description: Set of key-value pairs attached to the assistant.
+        type: object
+      model:
+        description: The model ID used by the assistant.
+        type: string
+      name:
+        description: The name of the assistant.
+        type: string
+      object:
+        description: Object type, which is "assistant".
+        type: string
+      tools:
+        description: A list of tools enabled on the assistant.
+        items:
+          $ref: '#/definitions/openai.Tool'
+        type: array
+    type: object
+  openai.AssistantRequest:
+    properties:
+      description:
+        type: string
+      file_ids:
+        items:
+          type: string
+        type: array
+      instructions:
+        type: string
+      metadata:
+        additionalProperties:
+          type: string
+        type: object
+      model:
+        type: string
+      name:
+        type: string
+      tools:
+        items:
+          $ref: '#/definitions/openai.Tool'
+        type: array
+    type: object
+  openai.Tool:
+    properties:
+      type:
+        $ref: '#/definitions/openai.ToolType'
+    type: object
+  openai.ToolType:
+    enum:
+    - code_interpreter
+    - retrieval
+    - function
+    type: string
+    x-enum-varnames:
+    - CodeInterpreter
+    - Retrieval
+    - Function
+  schema.ChatCompletionResponseFormat:
+    properties:
+      type:
+        type: string
+    type: object
+  schema.Choice:
+    properties:
+      delta:
+        $ref: '#/definitions/schema.Message'
+      finish_reason:
+        type: string
+      index:
+        type: integer
+      message:
+        $ref: '#/definitions/schema.Message'
+      text:
+        type: string
+    type: object
+  schema.FunctionCall:
+    properties:
+      arguments:
+        type: string
+      name:
+        type: string
+    type: object
+  schema.Item:
+    properties:
+      b64_json:
+        type: string
+      embedding:
+        items:
+          type: number
+        type: array
+      index:
+        type: integer
+      object:
+        type: string
+      url:
+        description: Images
+        type: string
+    type: object
+  schema.Message:
+    properties:
+      content:
+        description: The message content
+      function_call:
+        description: A result of a function call
+      name:
+        description: The message name (used for tools calls)
+        type: string
+      role:
+        description: The message role
+        type: string
+      string_content:
+        type: string
+      string_images:
+        items:
+          type: string
+        type: array
+      tool_calls:
+        items:
+          $ref: '#/definitions/schema.ToolCall'
+        type: array
+    type: object
+  schema.OpenAIRequest:
+    properties:
+      backend:
+        type: string
+      batch:
+        description: Custom parameters - not present in the OpenAI API
+        type: integer
+      clip_skip:
+        description: Diffusers
+        type: integer
+      echo:
+        type: boolean
+      file:
+        description: whisper
+        type: string
+      frequency_penalty:
+        type: number
+      function_call:
+        description: might be a string or an object
+      functions:
+        description: A list of available functions to call
+        items:
+          $ref: '#/definitions/grammar.Function'
+        type: array
+      grammar:
+        description: A grammar to constrain the LLM output
+        type: string
+      grammar_json_functions:
+        $ref: '#/definitions/grammar.JSONFunctionStructure'
+      ignore_eos:
+        type: boolean
+      input: {}
+      instruction:
+        description: Edit endpoint
+        type: string
+      language:
+        description: Also part of the OpenAI official spec
+        type: string
+      max_tokens:
+        type: integer
+      messages:
+        description: Messages is read only by chat/completion API calls
+        items:
+          $ref: '#/definitions/schema.Message'
+        type: array
+      mode:
+        description: Image (not supported by OpenAI)
+        type: integer
+      model:
+        description: Also part of the OpenAI official spec
+        type: string
+      model_base_name:
+        description: AutoGPTQ
+        type: string
+      "n":
+        description: Also part of the OpenAI official spec. use it for returning multiple
+          results
+        type: integer
+      n_keep:
+        type: integer
+      negative_prompt:
+        type: string
+      negative_prompt_scale:
+        type: number
+      presence_penalty:
+        type: number
+      prompt:
+        description: Prompt is read only by completion/image API calls
+      repeat_penalty:
+        type: number
+      response_format:
+        allOf:
+        - $ref: '#/definitions/schema.ChatCompletionResponseFormat'
+        description: whisper/image
+      rope_freq_base:
+        type: number
+      rope_freq_scale:
+        type: number
+      seed:
+        type: integer
+      size:
+        description: image
+        type: string
+      step:
+        type: integer
+      stop: {}
+      stream:
+        type: boolean
+      temperature:
+        type: number
+      tfz:
+        type: number
+      tokenizer:
+        description: RWKV (?)
+        type: string
+      tool_choice: {}
+      tools:
+        items:
+          $ref: '#/definitions/grammar.Tool'
+        type: array
+      top_k:
+        type: integer
+      top_p:
+        description: Common options between all the API calls, part of the OpenAI
+          spec
+        type: number
+      typical_p:
+        type: number
+      use_fast_tokenizer:
+        description: AutoGPTQ
+        type: boolean
+    required:
+    - file
+    type: object
+  schema.OpenAIResponse:
+    properties:
+      choices:
+        items:
+          $ref: '#/definitions/schema.Choice'
+        type: array
+      created:
+        type: integer
+      data:
+        items:
+          $ref: '#/definitions/schema.Item'
+        type: array
+      id:
+        type: string
+      model:
+        type: string
+      object:
+        type: string
+      usage:
+        $ref: '#/definitions/schema.OpenAIUsage'
+    type: object
+  schema.OpenAIUsage:
+    properties:
+      completion_tokens:
+        type: integer
+      prompt_tokens:
+        type: integer
+      total_tokens:
+        type: integer
+    type: object
+  schema.TTSRequest:
+    properties:
+      backend:
+        type: string
+      input:
+        type: string
+      model:
+        type: string
+      voice:
+        type: string
+    type: object
+  schema.ToolCall:
+    properties:
+      function:
+        $ref: '#/definitions/schema.FunctionCall'
+      id:
+        type: string
+      index:
+        type: integer
+      type:
+        type: string
+    type: object
+info:
+  contact:
+    name: LocalAI
+    url: https://localai.io
+  description: The LocalAI Rest API.
+  license:
+    name: MIT
+    url: https://raw.githubusercontent.com/mudler/LocalAI/master/LICENSE
+  title: LocalAI API
+  version: 2.0.0
+paths:
+  /v1/assistants:
+    post:
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/openai.AssistantRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/openai.Assistant'
+      summary: Create an assistant with a model and instructions.
+  /v1/audio/speech:
+    post:
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.TTSRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            type: string
+      summary: Generates audio from the input text.
+  /v1/audio/transcriptions:
+    post:
+      consumes:
+      - multipart/form-data
+      parameters:
+      - description: model
+        in: formData
+        name: model
+        required: true
+        type: string
+      - description: file
+        in: formData
+        name: file
+        required: true
+        type: file
+      responses:
+        "200":
+          description: Response
+          schema:
+            additionalProperties:
+              type: string
+            type: object
+      summary: Transcribes audio into the input language.
+  /v1/chat/completions:
+    post:
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.OpenAIRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/schema.OpenAIResponse'
+      summary: Generate a chat completions for a given prompt and model.
+  /v1/completions:
+    post:
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.OpenAIRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/schema.OpenAIResponse'
+      summary: Generate completions for a given prompt and model.
+  /v1/embeddings:
+    post:
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.OpenAIRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/schema.OpenAIResponse'
+      summary: Get a vector representation of a given input that can be easily consumed
+        by machine learning models and algorithms.
+  /v1/images/generations:
+    post:
+      parameters:
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.OpenAIRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            $ref: '#/definitions/schema.OpenAIResponse'
+      summary: Creates an image given a prompt.
+  /v1/text-to-speech/{voice-id}:
+    post:
+      parameters:
+      - description: Account ID
+        in: path
+        name: voice-id
+        required: true
+        type: string
+      - description: query params
+        in: body
+        name: request
+        required: true
+        schema:
+          $ref: '#/definitions/schema.TTSRequest'
+      responses:
+        "200":
+          description: Response
+          schema:
+            type: string
+      summary: Generates audio from the input text.
+securityDefinitions:
+  BearerAuth:
+    in: header
+    name: Authorization
+    type: apiKey
+swagger: "2.0"