Adapt cross modal search to 2.0 (#631)

* fix: comment out merge_all and discard compound indexer pattern * feat: add merge root executor * feat: adapt most executors and yml to 2.0 * feat: clean index flow with comments * fix: tokenize text and input to CLIP text encoder * fix: fix image reader and normalizer * feat: query flow adapted to 2.0 with comment * fix: change 0:0:0:0 to localhost in Readme * feat: fix the requirements * feat: fix the requirements * feat: persistence * fix: fix the index flow * feat: fix index flow * feat: query flow that supports text input * help to adapt the cross modal search to 2.0 (#657) * fix: fix the query part * feat: fix the bug in the indexing text part * fix: fix the query flow * fix: fix the score for text2image matching * fix: fix the query mode * feat: clean up * fix: fix the workspace * fix: remove hello fashion dependency * chore: clean up * fix: switch to use mime_type for routing * fix: adapt evaluation * feat: remove kv indexer * fix: remove the keyvalue indexer * fix: remove the keyvalue indexer * feat: add tests * fix: revert kvindexer deletion * chore: clean up * chore: clean up * chore: clean up Co-authored-by: Nan Wang <nan.wang@jina.ai>
jina-ai · Jun 15, 2021 · edaccb3 · edaccb3
1 parent 90a311d
commit edaccb3
Show file tree

Hide file tree

Showing 26 changed files with 444 additions and 300 deletions.
diff --git a/__init__.py b/__init__.py
diff --git a/cross-modal-search/README.md b/cross-modal-search/README.md
@@ -124,7 +124,7 @@ python app.py -t query_restful
 You should open another terminal window and paste the following command. 
 
 ```sh
-curl --request POST -d '{"top_k": 5, "mode": "search",  "data": ["hello world"]}' -H 'Content-Type: application/json' 'http://0.0.0.0:45678/search'
+curl --request POST -d '{"top_k": 5, "mode": "search",  "data": ["hello world"]}' -H 'Content-Type: application/json' 'http://localhost:45678/search'
 ```
 
 Once you run this command, you should see a JSON output returned to you. This contains the five most semantically similar images sentences to the text input you provided in the `data` parameter.

diff --git a/cross-modal-search/__init__.py b/cross-modal-search/__init__.py
diff --git a/cross-modal-search/app.py b/cross-modal-search/app.py
@@ -5,8 +5,8 @@
 import sys
 
 import click
-from jina import Flow
-from jina.logging import JinaLogger
+from jina import Flow, Document
+import logging
 from jina.logging.profile import TimeContext
 
 from dataset import input_index_data
@@ -15,73 +15,73 @@
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 
 
-def config(model_name):
+def config():
     os.environ['JINA_PARALLEL'] = os.environ.get('JINA_PARALLEL', '1')
     os.environ['JINA_SHARDS'] = os.environ.get('JINA_SHARDS', '1')
     os.environ["JINA_WORKSPACE"] = os.environ.get("JINA_WORKSPACE", "workspace")
     os.environ['JINA_PORT'] = '45678'
-    if model_name == 'clip':
-        os.environ['JINA_IMAGE_ENCODER'] = os.environ.get('JINA_IMAGE_ENCODER', 'docker://jinahub/pod.encoder.clipimageencoder:0.0.2-1.2.0')
-        os.environ['JINA_TEXT_ENCODER'] = os.environ.get('JINA_TEXT_ENCODER', 'docker://jinahub/pod.encoder.cliptextencoder:0.0.3-1.2.2')
-        os.environ['JINA_TEXT_ENCODER_INTERNAL'] = 'pods/clip/text-encoder.yml'
-    elif model_name == 'vse':
-        os.environ['JINA_IMAGE_ENCODER'] = os.environ.get('JINA_IMAGE_ENCODER', 'docker://jinahub/pod.encoder.vseimageencoder:0.0.5-1.2.0')
-        os.environ['JINA_TEXT_ENCODER'] = os.environ.get('JINA_TEXT_ENCODER', 'docker://jinahub/pod.encoder.vsetextencoder:0.0.6-1.2.0')
-        os.environ['JINA_TEXT_ENCODER_INTERNAL'] = 'pods/vse/text-encoder.yml'
-
-
-def index_restful(num_docs):
-    f = Flow().load_config('flows/flow-index.yml')
-
-    with f:
-        data_path = os.path.join(os.path.dirname(__file__), os.environ.get('JINA_DATA_FILE', None))
-        f.logger.info(f'Indexing {data_path}')
-        url = f'http://0.0.0.0:{f.port_expose}/index'
-
-        input_docs = _input_lines(
-            filepath=data_path,
-            size=num_docs,
-            read_mode='r',
-        )
-        data_json = {'data': [Document(text=text).dict() for text in input_docs]}
-        r = requests.post(url, json=data_json)
-        if r.status_code != 200:
-            raise Exception(f'api request failed, url: {url}, status: {r.status_code}, content: {r.content}')
+
+
+def index_restful():
+    flow = Flow().load_config('flows/flow-index.yml')
+    flow.use_rest_gateway()
+    with flow:
+        flow.block()
+
+
+def check_index_result(resp):
+    for doc in resp.data.docs:
+        _doc = Document(doc)
+        print(f'{_doc.id[:10]}, buffer: {len(_doc.buffer)}, mime_type: {_doc.mime_type}, modality: {_doc.modality}, embed: {_doc.embedding.shape}, uri: {_doc.uri[:20]}')
+
+
+def check_query_result(resp):
+    for doc in resp.data.docs:
+        _doc = Document(doc)
+        print(f'{_doc.id[:10]}, buffer: {len(_doc.buffer)}, embed: {_doc.embedding.shape}, uri: {_doc.uri[:20]}, chunks: {len(_doc.chunks)}, matches: {len(_doc.matches)}')
+        if _doc.matches:
+            for m in _doc.matches:
+                print(f'\t+- {m.id[:10]}, score: {m.score.value}, text: {m.text}, modality: {m.modality}, uri: {m.uri[:20]}')
 
 
 def index(data_set, num_docs, request_size):
-    f = Flow.load_config('flows/flow-index.yml')
-    with f:
-        with TimeContext(f'QPS: indexing {num_docs}', logger=f.logger):
-            f.index(
+    flow = Flow.load_config('flows/flow-index.yml')
+    with flow:
+        with TimeContext(f'QPS: indexing {num_docs}', logger=flow.logger):
+            flow.index(
                 inputs=input_index_data(num_docs, request_size, data_set),
-                request_size=request_size
+                request_size=request_size,
+                on_done=check_index_result
             )
 
 
-def query_restful():
-    f = Flow().load_config('flows/flow-query.yml')
-    f.use_rest_gateway()
-    with f:
-        f.block()
+def query():
+    flow = Flow().load_config('flows/flow-query.yml')
+    flow.use_rest_gateway()
+    with flow:
+        flow.search(inputs=[
+            Document(text='a black dog and a spotted dog are fighting', modality='text'),
+            Document(uri='toy-data/images/1000268201_693b08cb0e.jpg', modality='image')
+        ],
+            on_done=check_query_result)
 
 
-def dryrun():
-    f = Flow().load_config('flows/flow-index.yml')
-    with f:
-        pass
+def query_restful():
+    flow = Flow().load_config('flows/flow-query.yml')
+    flow.use_rest_gateway()
+    with flow:
+        flow.block()
 
 
 @click.command()
-@click.option('--task', '-t', type=click.Choice(['index', 'index_restful', 'query_restful', 'dryrun'], case_sensitive=False), default='index')
+@click.option('--task', '-t', type=click.Choice(['index', 'index_restful', 'query_restful', 'query']), default='index')
 @click.option("--num_docs", "-n", default=MAX_DOCS)
 @click.option('--request_size', '-s', default=16)
 @click.option('--data_set', '-d', type=click.Choice(['f30k', 'f8k', 'toy-data'], case_sensitive=False), default='toy-data')
-@click.option('--model_name', '-m', type=click.Choice(['clip', 'vse'], case_sensitive=False), default='clip')
-def main(task, num_docs, request_size, data_set, model_name):
-    config(model_name)
+def main(task, num_docs, request_size, data_set):
+    config()
     workspace = os.environ['JINA_WORKSPACE']
-    logger = JinaLogger('cross-modal-search')
+    logger = logging.getLogger('cross-modal-search')
     if 'index' in task:
         if os.path.exists(workspace):
             logger.error(
@@ -100,11 +100,11 @@ def main(task, num_docs, request_size, data_set, model_name):
     if task == 'index':
         index(data_set, num_docs, request_size)
     elif task == 'index_restful':
-        index_restful(num_docs)
+        index_restful()
+    elif task == 'query':
+        query()
     elif task == 'query_restful':
         query_restful()
-    elif task == 'dryrun':
-        dryrun()
 
 
 if __name__ == '__main__':

diff --git a/cross-modal-search/evaluate.py b/cross-modal-search/evaluate.py
@@ -21,13 +21,9 @@ def config(model_name):
     os.environ['JINA_PORT'] = '45678'
     os.environ['JINA_USE_REST_API'] = 'false'
     if model_name == 'clip':
-        os.environ['JINA_IMAGE_ENCODER'] = 'docker://jinahub/pod.encoder.clipimageencoder:0.0.1-1.0.7'
-        os.environ['JINA_TEXT_ENCODER'] = 'docker://jinahub/pod.encoder.cliptextencoder:0.0.1-1.0.7'
+        # os.environ['JINA_IMAGE_ENCODER'] = CLIPImageEncoder
+        # os.environ['JINA_TEXT_ENCODER'] = CLIPTextEncoder
         os.environ['JINA_TEXT_ENCODER_INTERNAL'] = 'pods/clip/text-encoder.yml'
-    elif model_name == 'vse':
-        os.environ['JINA_IMAGE_ENCODER'] = 'docker://jinahub/pod.encoder.vseimageencoder:0.0.5-1.0.7'
-        os.environ['JINA_TEXT_ENCODER'] = 'docker://jinahub/pod.encoder.vsetextencoder:0.0.6-1.0.7'
-        os.environ['JINA_TEXT_ENCODER_INTERNAL'] = 'pods/vse/text-encoder.yml'
     else:
         msg = f'Unsupported model {model_name}.'
         msg += 'Expected `clip` or `vse`.'
@@ -98,13 +94,13 @@ def print_evaluation_score(resp):
 def main(index_num_docs, evaluate_num_docs, request_size, data_set, model_name, evaluation_mode):
     config(model_name)
     if index_num_docs > 0:
-        with Flow.load_config('flow-index.yml') as f:
+        with Flow.load_config('flows/flow-index.yml') as f:
             f.use_rest_gateway()
             f.index(
                 input_fn=input_index_data(index_num_docs, request_size, data_set),
                 request_size=request_size
             )
-    with Flow.load_config('flow-query.yml').add(name='evaluator', uses='yaml/evaluate.yml') as flow_eval:
+    with Flow.load_config('flows/flow-query.yml').add(name='evaluator', uses='pods/evaluate.yml') as flow_eval:
         flow_eval.search(
             input_fn=evaluation_generator(evaluate_num_docs, request_size, data_set, mode=evaluation_mode),
             on_done=print_evaluation_score

diff --git a/cross-modal-search/flows/flow-index.yml b/cross-modal-search/flows/flow-index.yml
@@ -1,42 +1,43 @@
-!Flow
+jtype: Flow
 version: '1'
 with:
   prefetch: 10
+  port_expose: 45678
+  workspace: $JINA_WORKSPACE
 pods:
-  - name: loader
+  - name: loader                                      # load images from the dataset of image-caption pairs
     uses: pods/image-load.yml
     shards: $JINA_PARALLEL
     read_only: true
-  - name: normalizer
+    needs: [gateway]
+  - name: image_normalizer                                  # normalize the dimension of the images
     uses: pods/image-normalize.yml
     shards: $JINA_PARALLEL
     read_only: true
-  - name: image_encoder
-    uses: $JINA_IMAGE_ENCODER
+  - name: image_encoder                               # encode images into embeddings with CLIP model
+    uses: pods/clip/image-encoder.yml
     shards: $JINA_PARALLEL
     timeout_ready: 600000
     read_only: true
-  - name: image_vector_indexer
+  - name: image_vector_indexer                        # store image embeddings
     polling: any
     uses: pods/index-image-vector.yml
     shards: $JINA_SHARDS
-  - name: image_kv_indexer
+  - name: image_kv_indexer                            # store image documents
     polling: any
     uses: pods/index-image-kv.yml
     shards: $JINA_SHARDS
     needs: [gateway]
-  - name: text_encoder
-    uses: $JINA_TEXT_ENCODER
-    uses_internal: $JINA_TEXT_ENCODER_INTERNAL
+  - name: text_encoder                                # encode text into embeddings with CLIP model
+    uses: pods/clip/text-encoder.yml
     shards: $JINA_PARALLEL
     timeout_ready: 600000
     read_only: true
     needs: [gateway]
-  - name: text_indexer
+  - name: text_indexer                                # index the text into documents
     polling: any
-    uses: pods/index-text.yml
+    uses: pods/index-text.yml #(numpy + binary pb indexer)
     shards: $JINA_SHARDS
-  - name: join_all
-    uses: _merge_root
-    needs: [image_vector_indexer, image_kv_indexer, text_indexer]
-    read_only: true
+    needs: text_encoder
+  - name: join_all                                    # wait on the 3 executors to finish data processing with "needs"
+    needs: [image_vector_indexer, image_kv_indexer, text_indexer]
diff --git a/cross-modal-search/flows/flow-query.yml b/cross-modal-search/flows/flow-query.yml
@@ -1,49 +1,47 @@
-!Flow
+jtype: Flow
 version: '1'
 with:
   prefetch: 10
   port_expose: 45678
+  workspace: $JINA_WORKSPACE
 pods:
-  - name: loader
+  - name: loader                          # load query image
     uses: pods/image-load.yml
     shards: $JINA_PARALLEL
     read_only: true
-  - name: normalizer
+    needs: gateway
+  - name: normalizer                      # normalize query image
     uses: pods/image-normalize.yml
     shards: $JINA_PARALLEL
     read_only: true
-  - name: image_encoder
+    needs: loader
+  - name: image_encoder                   # encode query image into embeddings with CLIP model
     polling: any
-    uses: $JINA_IMAGE_ENCODER
+    uses: pods/clip/image-encoder.yml
     shards: $JINA_PARALLEL
     timeout_ready: 600000
     read_only: true
-  - name: text_indexer
+    needs: normalizer
+  - name: text_indexer                    # index query text
     polling: all
     uses: pods/index-text.yml
     shards: $JINA_SHARDS
-    uses_after: pods/merge_matches_sort_topk.yml
-    remove_uses_ba: true
-  - name: text_encoder
-    uses: $JINA_TEXT_ENCODER
-    uses_internal: $JINA_TEXT_ENCODER_INTERNAL
+  - name: text_encoder                    # encode query text into embeddings with CLIP model
+    uses: pods/clip/text-encoder.yml
     shards: $JINA_PARALLEL
     timeout_ready: 600000
     read_only: true
     needs: [gateway]
-  - name: image_vector_indexer
+  - name: image_vector_indexer            # index query image embeddings
     polling: all
     uses: pods/index-image-vector.yml
     shards: $JINA_SHARDS
-    uses_after: _merge_matches
-    remove_uses_ba: true
-  - name: image_kv_indexer
+    needs: text_encoder
+  - name: image_kv_indexer                # index query image as kv
     polling: all
     uses: pods/index-image-kv.yml
     shards: $JINA_SHARDS
-    uses_after: pods/merge_matches_sort_topk.yml
-    remove_uses_ba: true
-  - name: join_all
-    uses: _merge_root
+    needs: image_vector_indexer
+  - name: join_all                        # combine text and image queries
     needs: [text_indexer, image_kv_indexer]
     read_only: true
diff --git a/cross-modal-search/pods/__init__.py b/cross-modal-search/pods/__init__.py
diff --git a/cross-modal-search/pods/clip/hub-image-encoder.yml b/cross-modal-search/pods/clip/hub-image-encoder.yml
diff --git a/cross-modal-search/pods/clip/hub-text-encoder.yml b/cross-modal-search/pods/clip/hub-text-encoder.yml
diff --git a/cross-modal-search/pods/clip/image-encoder.yml b/cross-modal-search/pods/clip/image-encoder.yml
@@ -0,0 +1,4 @@
+jtype: CLIPImageEncoder
+metas:
+  py_modules:
+    - '../executors.py'
diff --git a/cross-modal-search/pods/clip/text-encoder.yml b/cross-modal-search/pods/clip/text-encoder.yml
@@ -1,16 +1,5 @@
-!CLIPTextEncoder
+# encodes text into embeddings with CLIP model
+jtype: CLIPTextEncoder
 metas:
   py_modules:
-    - workspace/__init__.py
-requests:
-  on:
-    IndexRequest:
-      - !FilterQL
-        with:
-          lookups: {'modality': 'text'}
-      - !EncodeDriver {}
-    SearchRequest:
-      - !FilterQL
-        with:
-          lookups: {'mime_type__contains': 'text'}
-      - !EncodeDriver {}
+    - '../executors.py'