From 24ab3f18db5c6fdd12ea2754ceea3b34f1db8576 Mon Sep 17 00:00:00 2001 From: Zhuoheng Li <40489953+Andy-LZH@users.noreply.github.com> Date: Wed, 8 Nov 2023 22:35:38 +0000 Subject: [PATCH 1/2] include path for B32,B16,L14 --- src/open_clip/pretrained.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/open_clip/pretrained.py b/src/open_clip/pretrained.py index 97d4be6..342e192 100644 --- a/src/open_clip/pretrained.py +++ b/src/open_clip/pretrained.py @@ -46,18 +46,24 @@ laion2b_e16="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-laion2b_e16-af8dbd0c.pth", laion400m_e31="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt", laion400m_e32="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt", + metaclip400m="https://dl.fbaipublicfiles.com/MMPT/metaclip/b32_400m.pt", + metaclip2_5b="https://dl.fbaipublicfiles.com/MMPT/metaclip/b32_fullcc2.5b.pt" ) _VITB32_quickgelu = dict( openai="https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt", laion400m_e31="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e31-d867053b.pt", laion400m_e32="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_e32-46683a32.pt", + metaclip400m="https://dl.fbaipublicfiles.com/MMPT/metaclip/b32_400m.pt", + metaclip2_5b="https://dl.fbaipublicfiles.com/MMPT/metaclip/b32_fullcc2.5b.pt" ) _VITB16 = dict( openai="https://openaipublic.azureedge.net/clip/models/5806e77cd80f8b59890b7e101eabd078d9fb84e6937f9e85e4ecb61988df416f/ViT-B-16.pt", laion400m_e31="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_16-laion400m_e31-00efa78f.pt", laion400m_e32="https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_16-laion400m_e32-55e67d44.pt", + metaclip400m="https://dl.fbaipublicfiles.com/MMPT/metaclip/b16_400m.pt", + metaclip2_5b="https://dl.fbaipublicfiles.com/MMPT/metaclip/b16_fullcc2.5b.pt" ) _VITB16_PLUS_240 = dict( @@ -69,6 +75,8 @@ openai="https://openaipublic.azureedge.net/clip/models/b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836/ViT-L-14.pt", laion400m_e31='https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_l_14-laion400m_e31-69988bb6.pt', laion400m_e32='https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_l_14-laion400m_e32-3d133497.pt', + metaclip400m='https://dl.fbaipublicfiles.com/MMPT/metaclip/l14_400m.pt', + metaclip2_5b='https://dl.fbaipublicfiles.com/MMPT/metaclip/l14_fullcc2.5b.pt' ) _VITL14_336 = dict( @@ -162,4 +170,4 @@ def download_pretrained(url: str, root: str = os.path.expanduser("~/.cache/clip" if expected_sha256 and hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: raise RuntimeError(f"Model has been downloaded but the SHA256 checksum does not not match") - return download_target + return download_target \ No newline at end of file From ddd771d5f0995be36aef5557a220680b8e874344 Mon Sep 17 00:00:00 2001 From: Andy-LZH Date: Wed, 8 Nov 2023 23:04:50 +0000 Subject: [PATCH 2/2] Include test to load metaclip in test.py --- tests/test.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/test.py diff --git a/tests/test.py b/tests/test.py new file mode 100644 index 0000000..0434d3e --- /dev/null +++ b/tests/test.py @@ -0,0 +1,33 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. + +import torch +from PIL import Image +from open_clip import tokenizer +import open_clip +import os + +os.environ["CUDA_VISIBLE_DEVICES"] = "" + + +def test_inference(): + for model_name in ["ViT-B-32", "ViT-B-32-quickgelu", "ViT-B-16", "ViT-L-14"]: + for pretrained in ["metaclip400m", "metaclip2_5b"]: + model, _, preprocess = open_clip.create_model_and_transforms( + model_name, pretrained=pretrained + ) + + current_dir = os.path.dirname(os.path.realpath(__file__)) + + image = preprocess(Image.open(current_dir + "/../docs/CLIP.png")).unsqueeze( + 0 + ) + text = tokenizer.tokenize(["a diagram", "a dog", "a cat"]) + + with torch.no_grad(): + image_features = model.encode_image(image) + text_features = model.encode_text(text) + + text_probs = (100.0 * image_features @ text_features.T).softmax(dim=-1) + + assert text_probs.cpu().numpy()[0].tolist() == [1.0, 0.0, 0.0]