/
app.py
80 lines (66 loc) · 3.09 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import glob
import os
import click
from jina import Flow
from jina.types.request import Request
from docarray import Document, DocumentArray
def config():
cur_dir = os.path.dirname(os.path.abspath(__file__))
model_dir = os.path.join(cur_dir, '.cache')
workspace_dir = os.path.join(cur_dir, 'workspace')
os.environ['JINA_PORT'] = '45678' # the port for accessing the RESTful service, i.e. http://localhost:45678/docs
os.environ['JINA_WORKSPACE'] = './workspace' # the directory to store the indexed data
os.environ['TOP_K'] = '50' # the maximal number of results to return
os.environ['MODEL_MOUNT_ASSETS'] = f'{model_dir}:/workdir/.cache'
os.environ['MODEL_MOUNT_CACHE'] = f'{model_dir}:/workdir/.cache'
os.environ['WORKSPACE_MOUNT'] = f'{workspace_dir}:/workdir/workspace'
def get_docs(data_path):
for fn in glob.glob(os.path.join(data_path, '*.mp4')):
yield Document(uri=fn, id=os.path.basename(fn))
def check_search(resp: Request):
for doc in resp.docs:
print(f'Query text: {doc.text}')
print(f'Matches:')
for m in doc.matches:
print(f'+- id: {m.id}, score: {m.scores["cosine"].value}, timestamp: {m.tags["timestamp"]}, link: {m.uri}')
print('-'*10)
@click.command()
@click.option('--mode', '-m', type=click.Choice(['restful', 'grpc', 'restful_query']), default='restful')
@click.option('--directory', '-d', type=click.Path(exists=True), default='toy_data')
def main(mode, directory):
config()
workspace = os.environ['JINA_WORKSPACE']
if os.path.exists(workspace) and mode not in ['restful_query', 'grpc_query']:
print(
f'\n +-----------------------------------------------------------------------------------+ \
\n | 🤖🤖🤖 | \
\n | The directory {workspace} already exists. Please remove it before indexing again. | \
\n | 🤖🤖🤖 | \
\n +-----------------------------------------------------------------------------------+'
)
return -1
if mode == 'grpc':
override_dict = {
'protocol': 'grpc',
'cors': False}
else:
override_dict = {}
if mode in ['grpc', 'restful']:
with Flow.load_config('index-flow.yml', override_with=override_dict) as f:
f.post(on='/index', inputs=get_docs(directory), request_size=1)
print('index completed.')
with Flow.load_config('search-flow.yml', override_with=override_dict) as f:
print('ready for searching.')
if mode == 'grpc':
f.post(
on='/search',
inputs=DocumentArray([
Document(text='bicycle bell ringing'),
Document(text='typing on a keyboard'),
Document(text='a young girl'),
]),
on_done=check_search)
elif mode in ['restful', 'restful_query']:
f.block()
if __name__ == '__main__':
main()