From 5877207924c088739644873d6cf654aabb1f7134 Mon Sep 17 00:00:00 2001 From: Ziniu Yu Date: Tue, 26 Jul 2022 10:19:27 +0800 Subject: [PATCH] fix: add models and md5 (#783) * fix: add models and md5 * fix: get correct model name * fix: typo * chore: remove unused comment * test: remove unused test * test: update test model --- server/clip_server/executors/clip_torch.py | 2 +- server/clip_server/model/clip_onnx.py | 55 ++++++++++++++++++ server/clip_server/model/openclip_model.py | 2 +- server/clip_server/model/pretrained_models.py | 57 +++++++++++++------ tests/test_model.py | 2 +- 5 files changed, 97 insertions(+), 21 deletions(-) diff --git a/server/clip_server/executors/clip_torch.py b/server/clip_server/executors/clip_torch.py index e2780d598..09749085b 100644 --- a/server/clip_server/executors/clip_torch.py +++ b/server/clip_server/executors/clip_torch.py @@ -20,7 +20,7 @@ class CLIPEncoder(Executor): def __init__( self, - name: str = 'ViT-B-32-quickgelu::openai', + name: str = 'ViT-B-32::openai', device: Optional[str] = None, jit: bool = False, num_worker_preprocess: int = 4, diff --git a/server/clip_server/model/clip_onnx.py b/server/clip_server/model/clip_onnx.py index d85f51cf4..e049272f1 100644 --- a/server/clip_server/model/clip_onnx.py +++ b/server/clip_server/model/clip_onnx.py @@ -8,6 +8,61 @@ ) _S3_BUCKET_V2 = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models-436c69702d61732d53657276696365/onnx/' _MODELS = { + 'RN50::openai': ( + ('RN50/textual.onnx', '722418bfe47a1f5c79d1f44884bb3103'), + ('RN50/visual.onnx', '5761475db01c3abb68a5a805662dcd10'), + ), + 'RN50::yfcc15m': (), + 'RN50::cc12m': (), + 'RN50-quickgelu::openai': (), + 'RN50-quickgelu::yfcc15m': (), + 'RN50-quickgelu::cc12m': (), + 'RN101::openai': ( + ('RN101/textual.onnx', '2d9efb7d184c0d68a369024cedfa97af'), + ('RN101/visual.onnx', '0297ebc773af312faab54f8b5a622d71'), + ), + 'RN101::yfcc15m': (), + 'RN101-quickgelu::openai': (), + 'RN101-quickgelu::yfcc15m': (), + 'RN50x4::openai': ( + ('RN50x4/textual.onnx', 'd9d63d3fe35fb14d4affaa2c4e284005'), + ('RN50x4/visual.onnx', '16afe1e35b85ad862e8bbdb12265c9cb'), + ), + 'RN50x16::openai': ( + ('RN50x16/textual.onnx', '1525785494ff5307cadc6bfa56db6274'), + ('RN50x16/visual.onnx', '2a293d9c3582f8abe29c9999e47d1091'), + ), + 'RN50x64::openai': ( + ('RN50x64/textual.onnx', '3ae8ade74578eb7a77506c11bfbfaf2c'), + ('RN50x64/visual.onnx', '1341f10b50b3aca6d2d5d13982cabcfc'), + ), + 'ViT-B-32::openai': ( + ('ViT-B-32/textual.onnx', 'bd6d7871e8bb95f3cc83aff3398d7390'), + ('ViT-B-32/visual.onnx', '88c6f38e522269d6c04a85df18e6370c'), + ), + 'ViT-B-32::laion2b_e16': (), + 'ViT-B-32::laion400m_e31': (), + 'ViT-B-32::laion400m_e32': (), + 'ViT-B-32-quickgelu::openai': (), + 'ViT-B-32-quickgelu::laion400m_e31': (), + 'ViT-B-32-quickgelu::laion400m_e32': (), + 'ViT-B-16::openai': ( + ('ViT-B-16/textual.onnx', '6f0976629a446f95c0c8767658f12ebe'), + ('ViT-B-16/visual.onnx', 'd5c03bfeef1abbd9bede54a8f6e1eaad'), + ), + 'ViT-B-16::laion400m_e31': (), + 'ViT-B-16::laion400m_e32': (), + 'ViT-B-16-plus-240::laion400m_e31': (), + 'ViT-B-16-plus-240::laion400m_e32': (), + 'ViT-L-14::openai': ( + ('ViT-L-14/textual.onnx', '325380b31af4837c2e0d9aba2fad8e1b'), + ('ViT-L-14/visual.onnx', '53f5b319d3dc5d42572adea884e31056'), + ), + 'ViT-L-14-336::openai': ( + ('ViT-L-14@336px/textual.onnx', '78fab479f136403eed0db46f3e9e7ed2'), + ('ViT-L-14@336px/visual.onnx', 'f3b1f5d55ca08d43d749e11f7e4ba27e'), + ), + # older version name format 'RN50': ( ('RN50/textual.onnx', '722418bfe47a1f5c79d1f44884bb3103'), ('RN50/visual.onnx', '5761475db01c3abb68a5a805662dcd10'), diff --git a/server/clip_server/model/openclip_model.py b/server/clip_server/model/openclip_model.py index d96ce6509..c496331c4 100644 --- a/server/clip_server/model/openclip_model.py +++ b/server/clip_server/model/openclip_model.py @@ -24,7 +24,7 @@ def __init__(self, name: str, device: str = 'cpu', jit: bool = False, **kwargs): if model_url: model_path = download_model(model_url, md5sum=md5sum) self._model = load_openai_model(model_path, device=device, jit=jit) - self._model_name = name + self._model_name = name.split('::')[0] else: model_name, pretrained = name.split('::') self._model = open_clip.create_model( diff --git a/server/clip_server/model/pretrained_models.py b/server/clip_server/model/pretrained_models.py index 367578257..ba6866e00 100644 --- a/server/clip_server/model/pretrained_models.py +++ b/server/clip_server/model/pretrained_models.py @@ -7,31 +7,52 @@ _OPENCLIP_S3_BUCKET = 'https://clip-as-service.s3.us-east-2.amazonaws.com/models/torch' _OPENCLIP_MODELS = { 'RN50::openai': ('RN50.pt', '9140964eaaf9f68c95aa8df6ca13777c'), - 'RN50::yfcc15m': (), - 'RN50::cc12m': (), - 'RN50-quickgelu::openai': (), - 'RN50-quickgelu::yfcc15m': (), - 'RN50-quickgelu::cc12m': (), + 'RN50::yfcc15m': ('RN50-yfcc15m.pt', 'e9c564f91ae7dc754d9043fdcd2a9f22'), + 'RN50::cc12m': ('RN50-cc12m.pt', '37cb01eb52bb6efe7666b1ff2d7311b5'), 'RN101::openai': ('RN101.pt', 'fa9d5f64ebf152bc56a18db245071014'), - 'RN101::yfcc15m': (), - 'RN101-quickgelu::openai': (), - 'RN101-quickgelu::yfcc15m': (), + 'RN101::yfcc15m': ('RN101-yfcc15m.pt', '48f7448879ce25e355804f6bb7928cb8'), 'RN50x4::openai': ('RN50x4.pt', '03830990bc768e82f7fb684cde7e5654'), 'RN50x16::openai': ('RN50x16.pt', '83d63878a818c65d0fb417e5fab1e8fe'), 'RN50x64::openai': ('RN50x64.pt', 'a6631a0de003c4075d286140fc6dd637'), 'ViT-B-32::openai': ('ViT-B-32.pt', '3ba34e387b24dfe590eeb1ae6a8a122b'), - 'ViT-B-32::laion2b_e16': (), - 'ViT-B-32::laion400m_e31': (), - 'ViT-B-32::laion400m_e32': (), - 'ViT-B-32-quickgelu::openai': (), - 'ViT-B-32-quickgelu::laion400m_e31': (), - 'ViT-B-32-quickgelu::laion400m_e32': (), + 'ViT-B-32::laion2b_e16': ( + 'ViT-B-32-laion2b_e16.pt', + 'df08de3d9f2dc53c71ea26e184633902', + ), + 'ViT-B-32::laion400m_e31': ( + 'ViT-B-32-laion400m_e31.pt', + 'ca8015f98ab0f8780510710681d7b73e', + ), + 'ViT-B-32::laion400m_e32': ( + 'ViT-B-32-laion400m_e32.pt', + '359e0dba4a419f175599ee0c63a110d8', + ), 'ViT-B-16::openai': ('ViT-B-16.pt', '44c3d804ecac03d9545ac1a3adbca3a6'), - 'ViT-B-16::laion400m_e31': (), - 'ViT-B-16::laion400m_e32': (), - 'ViT-B-16-plus-240::laion400m_e31': (), - 'ViT-B-16-plus-240::laion400m_e32': (), + 'ViT-B-16::laion400m_e31': ( + 'ViT-B-16-laion400m_e31.pt', + '31306a44224cc46fec1bc3b82fd0c4e6', + ), + 'ViT-B-16::laion400m_e32': ( + 'ViT-B-16-laion400m_e32.pt', + '07283adc5c17899f2ed22d82b563c54b', + ), + 'ViT-B-16-plus-240::laion400m_e31': ( + 'ViT-B-16-plus-240-laion400m_e31.pt', + 'c88f453644a998ecb094d878a2f0738d', + ), + 'ViT-B-16-plus-240::laion400m_e32': ( + 'ViT-B-16-plus-240-laion400m_e32.pt', + 'e573af3cef888441241e35022f30cc95', + ), 'ViT-L-14::openai': ('ViT-L-14.pt', '096db1af569b284eb76b3881534822d9'), + 'ViT-L-14::laion400m_e31': ( + 'ViT-L-14-laion400m_e31.pt', + '09d223a6d41d2c5c201a9da618d833aa', + ), + 'ViT-L-14::laion400m_e32': ( + 'ViT-L-14-laion400m_e32.pt', + 'a76cde1bc744ca38c6036b920c847a89', + ), 'ViT-L-14-336::openai': ('ViT-L-14-336px.pt', 'b311058cae50cb10fbfa2a44231c9473'), # older version name format 'RN50': ('RN50.pt', '9140964eaaf9f68c95aa8df6ca13777c'), diff --git a/tests/test_model.py b/tests/test_model.py index 4dc57c635..6223b2a4a 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -8,7 +8,7 @@ 'name, model_cls', [ ('ViT-L/14@336px', OpenCLIPModel), - ('RN101-quickgelu::openai', OpenCLIPModel), + ('RN101::openai', OpenCLIPModel), ('M-CLIP/XLM-Roberta-Large-Vit-B-32', MultilingualCLIPModel), ], )