jina-ai · OrangeSodahub · Dec 8, 2022 · Dec 8, 2022 · Dec 14, 2022 · Dec 14, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -113,6 +113,7 @@ jobs:
           pip install --no-cache-dir "server/[onnx]"
           pip install --no-cache-dir "server/[transformers]"
           pip install --no-cache-dir "server/[search]"
+          pip install open-clip-torch==2.7.0
       - name: Test
         id: test
         run: |
@@ -159,11 +160,13 @@ jobs:
           pip install -e "server/[tensorrt]"
           pip install -e "server/[onnx]"
           pip install -e "server/[transformers]"
+          pip install nvidia-tensorrt==8.4.1.5
           {
             pip install -e "server/[flash-attn]"
           } || {
             echo "flash attention was not installed."
           }
+          pip install open-clip-torch==2.7.0
       - name: Test
         id: test
         run: |

diff --git a/server/clip_server/executors/clip_tensorrt.py b/server/clip_server/executors/clip_tensorrt.py
@@ -25,6 +25,7 @@ def __init__(
         num_worker_preprocess: int = 4,
         minibatch_size: int = 32,
         access_paths: str = '@r',
+        dtype: Optional[str] = 'fp32',
         **kwargs,
     ):
         """
@@ -36,6 +37,7 @@ def __init__(
             number if you encounter OOM errors.
         :param access_paths: The access paths to traverse on the input documents to get the images and texts to be
             processed. Visit https://docarray.jina.ai/fundamentals/documentarray/access-elements for more details.
+        :param dtype: inference data type, defaults to 'fp32'.
         """
         super().__init__(**kwargs)
 
@@ -51,6 +53,7 @@ def __init__(
             self._access_paths = kwargs['traversal_paths']
 
         self._device = device
+        self._dtype = dtype
 
         import torch
 
@@ -63,7 +66,7 @@ def __init__(
             torch.cuda.is_available()
         ), "CUDA/GPU is not available on Pytorch. Please check your CUDA installation"
 
-        self._model = CLIPTensorRTModel(name)
+        self._model = CLIPTensorRTModel(name=name, dtype=dtype)
 
         self._model.start_engines()
 
@@ -85,6 +88,7 @@ def _preproc_images(self, docs: 'DocumentArray', drop_image_content: bool):
                     device=self._device,
                     return_np=False,
                     drop_image_content=drop_image_content,
+                    dtype=self._dtype,
                 )
 
     def _preproc_texts(self, docs: 'DocumentArray'):

diff --git a/server/clip_server/model/clip_trt.py b/server/clip_server/model/clip_trt.py
@@ -1,5 +1,5 @@
 import os
-from typing import Dict
+from typing import Dict, Optional
 
 try:
     import tensorrt as trt
@@ -51,6 +51,7 @@ class CLIPTensorRTModel(BaseCLIPModel):
     def __init__(
         self,
         name: str,
+        dtype: Optional[str] = 'fp32',
     ):
         super().__init__(name)
 
@@ -59,23 +60,35 @@ def __init__(
                 f'~/.cache/clip/{name.replace("/", "-").replace("::", "-")}'
             )
 
-            self._textual_path = os.path.join(
-                cache_dir,
-                f'textual.{ONNX_MODELS[name][0][1]}.trt',
-            )
-            self._visual_path = os.path.join(
-                cache_dir,
-                f'visual.{ONNX_MODELS[name][1][1]}.trt',
-            )
+            if dtype == 'fp16':
+                self._textual_path = os.path.join(
+                    cache_dir,
+                    f'textual.{ONNX_MODELS[name][0][1]}.fp16.trt',
+                )
+                self._visual_path = os.path.join(
+                    cache_dir,
+                    f'visual.{ONNX_MODELS[name][1][1]}.fp16.trt',
+                )
+            else:
+                self._textual_path = os.path.join(
+                    cache_dir,
+                    f'textual.{ONNX_MODELS[name][0][1]}.trt',
+                )
+                self._visual_path = os.path.join(
+                    cache_dir,
+                    f'visual.{ONNX_MODELS[name][1][1]}.trt',
+                )
 
             if not os.path.exists(self._textual_path) or not os.path.exists(
                 self._visual_path
             ):
                 from clip_server.model.clip_onnx import CLIPOnnxModel
 
+                fp16 = dtype == 'fp16'
+
                 trt_logger: Logger = trt.Logger(trt.Logger.ERROR)
                 runtime: Runtime = trt.Runtime(trt_logger)
-                onnx_model = CLIPOnnxModel(name)
+                onnx_model = CLIPOnnxModel(name=name, dtype=dtype)
 
                 visual_engine = build_engine(
                     runtime=runtime,
@@ -95,7 +108,7 @@ def __init__(
                         onnx_model.image_size,
                     ),
                     workspace_size=10000 * 1024 * 1024,
-                    fp16=False,
+                    fp16=fp16,
                     int8=False,
                 )
                 save_engine(visual_engine, self._visual_path)
@@ -108,7 +121,7 @@ def __init__(
                     optimal_shape=(768, 77),
                     max_shape=(1024, 77),
                     workspace_size=10000 * 1024 * 1024,
-                    fp16=False,
+                    fp16=fp16,
                     int8=False,
                 )
                 save_engine(text_engine, self._textual_path)

diff --git a/server/setup.py b/server/setup.py
@@ -4,91 +4,93 @@
 from setuptools import find_packages, setup
 
 if sys.version_info < (3, 7, 0):
-    raise OSError(f'CLIP-as-service requires Python >=3.7, but yours is {sys.version}')
+    raise OSError(f"CLIP-as-service requires Python >=3.7, but yours is {sys.version}")
 
 try:
-    pkg_name = 'clip-server'
+    pkg_name = "clip-server"
     libinfo_py = path.join(
-        path.dirname(__file__), pkg_name.replace('-', '_'), '__init__.py'
+        path.dirname(__file__), pkg_name.replace("-", "_"), "__init__.py"
     )
-    libinfo_content = open(libinfo_py, 'r', encoding='utf8').readlines()
-    version_line = [l.strip() for l in libinfo_content if l.startswith('__version__')][
+    libinfo_content = open(libinfo_py, "r", encoding="utf8").readlines()
+    version_line = [l.strip() for l in libinfo_content if l.startswith("__version__")][
         0
     ]
     exec(version_line)  # gives __version__
 except FileNotFoundError:
-    __version__ = '0.0.0'
+    __version__ = "0.0.0"
 
 try:
-    with open('../README.md', encoding='utf8') as fp:
+    with open("../README.md", encoding="utf8") as fp:
         _long_description = fp.read()
 except FileNotFoundError:
-    _long_description = ''
+    _long_description = ""
 
 setup(
     name=pkg_name,
     packages=find_packages(),
     version=__version__,
     include_package_data=True,
-    description='Embed images and sentences into fixed-length vectors via CLIP',
-    author='Jina AI',
-    author_email='hello@jina.ai',
-    license='Apache 2.0',
-    url='https://github.com/jina-ai/clip-as-service',
-    download_url='https://github.com/jina-ai/clip-as-service/tags',
+    description="Embed images and sentences into fixed-length vectors via CLIP",
+    author="Jina AI",
+    author_email="hello@jina.ai",
+    license="Apache 2.0",
+    url="https://github.com/jina-ai/clip-as-service",
+    download_url="https://github.com/jina-ai/clip-as-service/tags",
     long_description=_long_description,
-    long_description_content_type='text/markdown',
+    long_description_content_type="text/markdown",
     zip_safe=False,
-    setup_requires=['setuptools>=18.0', 'wheel'],
+    setup_requires=["setuptools>=18.0", "wheel"],
     install_requires=[
-        'ftfy',
-        'torch',
-        'regex',
-        'torchvision<=0.13.0' if sys.version_info <= (3, 7, 2) else 'torchvision',
-        'jina>=3.12.0',
-        'prometheus-client',
-        'open_clip_torch>=2.7.0',
+        "ftfy==6.1.1",
+        "torch==1.13.0",
+        "regex==2022.10.31",
+        "torchvision<=0.13.0"
+        if sys.version_info <= (3, 7, 2)
+        else "torchvision==0.14.0",
+        "jina==3.12.0",
+        "prometheus-client==0.15.0",
+        "open_clip_torch==2.7.0",
     ],
     extras_require={
-        'onnx': [
-            'onnxruntime',
-            'onnx',
-            'onnxmltools',
+        "onnx": [
+            "onnxruntime==1.13.1",
+            "onnx==1.12.0",
+            "onnxmltools==1.11.1",
         ]
-        + (['onnxruntime-gpu>=1.8.0'] if sys.platform != 'darwin' else []),
-        'tensorrt': ['nvidia-tensorrt'],
-        'transformers': ['transformers>=4.16.2'],
-        'search': ['annlite>=0.3.10'],
-        'flash-attn': ['flash-attn'],
+        + (["onnxruntime-gpu==1.13.1"] if sys.platform != "darwin" else []),
+        "tensorrt": ["nvidia-tensorrt==8.4.1.5"],
+        "transformers": ["transformers==4.25.1"],
+        "search": ["annlite>=0.3.10"],
+        "flash-attn": ["flash-attn==0.2.4"],
     },
     classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Developers',
-        'Intended Audience :: Education',
-        'Intended Audience :: Science/Research',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Unix Shell',
-        'Environment :: Console',
-        'License :: OSI Approved :: Apache Software License',
-        'Operating System :: OS Independent',
-        'Topic :: Database :: Database Engines/Servers',
-        'Topic :: Scientific/Engineering :: Artificial Intelligence',
-        'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
-        'Topic :: Scientific/Engineering :: Image Recognition',
-        'Topic :: Multimedia :: Video',
-        'Topic :: Scientific/Engineering',
-        'Topic :: Scientific/Engineering :: Mathematics',
-        'Topic :: Software Development',
-        'Topic :: Software Development :: Libraries',
-        'Topic :: Software Development :: Libraries :: Python Modules',
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Intended Audience :: Science/Research",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Unix Shell",
+        "Environment :: Console",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: OS Independent",
+        "Topic :: Database :: Database Engines/Servers",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
+        "Topic :: Scientific/Engineering :: Image Recognition",
+        "Topic :: Multimedia :: Video",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Mathematics",
+        "Topic :: Software Development",
+        "Topic :: Software Development :: Libraries",
+        "Topic :: Software Development :: Libraries :: Python Modules",
     ],
     project_urls={
-        'Documentation': 'https://clip-as-service.jina.ai',
-        'Source': 'https://github.com/jina-ai/clip-as-service/',
-        'Tracker': 'https://github.com/jina-ai/clip-as-service/issues',
+        "Documentation": "https://clip-as-service.jina.ai",
+        "Source": "https://github.com/jina-ai/clip-as-service/",
+        "Tracker": "https://github.com/jina-ai/clip-as-service/issues",
     },
-    keywords='jina openai clip deep-learning cross-modal multi-modal neural-search',
+    keywords="jina openai clip deep-learning cross-modal multi-modal neural-search",
 )
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -58,6 +58,17 @@ def make_trt_flow(port_generator, request):
         yield f
 
 
+@pytest.fixture(scope='session', params=['tensorrt'])
+def make_trt_flow_fp16(port_generator, request):
+    from clip_server.executors.clip_tensorrt import CLIPEncoder
+
+    f = Flow(port=port_generator()).add(
+        name=request.param, uses=CLIPEncoder, uses_with={'dtype': 'fp16'}
+    )
+    with f:
+        yield f
+
+
 @pytest.fixture(params=['torch'])
 def make_search_flow(tmpdir, port_generator, request):
     from clip_server.executors.clip_torch import CLIPEncoder

diff --git a/tests/test_tensorrt.py b/tests/test_tensorrt.py
@@ -41,6 +41,39 @@ def test_docarray_inputs(make_trt_flow, inputs):
         assert inputs[0] is r[0]
 
 
+@pytest.mark.gpu
+@pytest.mark.parametrize(
+    'inputs',
+    [
+        [Document(text='hello, world'), Document(text='goodbye, world')],
+        DocumentArray([Document(text='hello, world'), Document(text='goodbye, world')]),
+        lambda: (Document(text='hello, world') for _ in range(10)),
+        DocumentArray(
+            [
+                Document(uri='https://docarray.jina.ai/_static/favicon.png'),
+                Document(
+                    uri=f'{os.path.dirname(os.path.abspath(__file__))}/img/00000.jpg'
+                ),
+                Document(text='hello, world'),
+                Document(
+                    uri=f'{os.path.dirname(os.path.abspath(__file__))}/img/00000.jpg'
+                ).load_uri_to_image_tensor(),
+            ]
+        ),
+        DocumentArray.from_files(
+            f'{os.path.dirname(os.path.abspath(__file__))}/**/*.jpg'
+        ),
+    ],
+)
+def test_docarray_inputs_fp16(make_trt_flow_fp16, inputs):
+    c = Client(server=f'grpc://0.0.0.0:{make_trt_flow_fp16.port}')
+    r = c.encode(inputs if not callable(inputs) else inputs())
+    assert isinstance(r, DocumentArray)
+    assert r.embeddings.shape
+    if hasattr(inputs, '__len__'):
+        assert inputs[0] is r[0]
+
+
 @pytest.mark.gpu
 @pytest.mark.asyncio
 @pytest.mark.parametrize(