In [9]:
import torch 
from PIL import Image

import cn_clip.clip as clip
from cn_clip.clip import load_from_name, available_models
print("Available models:", available_models())  
# Available models: ['ViT-B-16', 'ViT-L-14', 'ViT-L-14-336', 'ViT-H-14', 'RN50']

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = load_from_name('ViT-H-14', device=device, download_root='./')
model.eval()
# image = preprocess(Image.open("examples/pokemon.jpeg")).unsqueeze(0).to(device)
image = preprocess(Image.open("2.jpg")).unsqueeze(0).to(device)
text = clip.tokenize(["杰尼龟", "妙蛙种子", "小火龙", "皮卡丘", "狗屁"]).to(device)

with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)
    # 对特征进行归一化，请使用归一化后的图文特征用于下游任务
    image_features /= image_features.norm(dim=-1, keepdim=True) 
    text_features /= text_features.norm(dim=-1, keepdim=True)    

    logits_per_image, logits_per_text = model.get_similarity(image, text)
    probs = logits_per_image.softmax(dim=-1).cpu().numpy()

print("Label probs:", probs)  # [[1.268734e-03 5.436878e-02 6.795761e-04 9.436829e-01]]

Available models: ['ViT-B-16', 'ViT-L-14', 'ViT-L-14-336', 'ViT-H-14', 'RN50']


100%|█████████████████████████████████████| 3.57G/3.57G [34:59<00:00, 1.82MiB/s]


Loading vision model config from d:\code\image\chinese-clip\cn_clip\clip\model_configs\ViT-H-14.json
Loading text model config from d:\code\image\chinese-clip\cn_clip\clip\model_configs\RoBERTa-wwm-ext-large-chinese.json
Model info {'embed_dim': 1024, 'image_resolution': 224, 'vision_layers': 32, 'vision_width': 1280, 'vision_head_width': 80, 'vision_patch_size': 14, 'vocab_size': 21128, 'text_attention_probs_dropout_prob': 0.1, 'text_hidden_act': 'gelu', 'text_hidden_dropout_prob': 0.1, 'text_hidden_size': 1024, 'text_initializer_range': 0.02, 'text_intermediate_size': 4096, 'text_max_position_embeddings': 512, 'text_num_attention_heads': 16, 'text_num_hidden_layers': 24, 'text_type_vocab_size': 2}
Label probs: [[0.359   0.01816 0.3762  0.1321  0.11475]]


In [8]:
logits_per_image

tensor([[29.8750, 30.2656, 31.0000, 32.5625, 33.5625]], device='cuda:0',
       dtype=torch.float16)

In [11]:
from diffusers import StableDiffusionPipeline
from yelib.utils.network_util import ProxyContext

with ProxyContext():
    model_id = "alibaba-pai/pai-diffusion-artist-large-zh"
    pipe = StableDiffusionPipeline.from_pretrained(model_id)
    pipe.unet.load_attn_procs("alibaba-pai/pai-diffusion-artist-large-zh-lora-25D")
pipe = pipe.to("cuda")

prompt = "蓝天，白云，大草原"
image = pipe(prompt, cross_attention_kwargs={"scale": 0.4}).images[0]
image.save("result.png")

entering proxy context http://192.168.1.45:10809


Downloading (…)ain/model_index.json:   0%|          | 0.00/582 [00:00<?, ?B/s]

text_encoder\model.safetensors not found


Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

Downloading (…)_encoder/config.json:   0%|          | 0.00/623 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/419 [00:00<?, ?B/s]

Downloading (…)_checker/config.json:   0%|          | 0.00/4.86k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/518 [00:00<?, ?B/s]

Downloading (…)cheduler_config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

Downloading (…)/tokenizer/vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

Downloading (…)7d4/unet/config.json:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

Downloading (…)67d4/vae/config.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)on_pytorch_model.bin:   0%|          | 0.00/335M [00:00<?, ?B/s]

Downloading (…)on_pytorch_model.bin:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

exit proxy context http://192.168.1.45:10809


ConnectionError: HTTPSConnectionPool(host='cdn-lfs.huggingface.co', port=443): Read timed out.

In [12]:
from diffusers import StableDiffusionPipeline

with ProxyContext():
    pipeline = StableDiffusionPipeline.from_pretrained("svjack/Stable-Diffusion-FineTuned-zh-v1")
    pipeline.safety_checker = lambda images, clip_input: (images, False)
pipeline = pipeline.to("cuda")

prompt = '女孩们打开了另一世界的大门'
image = pipeline(prompt, guidance_scale=7.5).images[0]

entering proxy context http://192.168.1.45:10809


Downloading (…)ain/model_index.json:   0%|          | 0.00/544 [00:00<?, ?B/s]

text_encoder\model.safetensors not found


Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading (…)cheduler_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

Downloading (…)_checker/config.json:   0%|          | 0.00/4.84k [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

Downloading (…)_encoder/config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

Downloading (…)8c6/unet/config.json:   0%|          | 0.00/915 [00:00<?, ?B/s]

Downloading (…)/tokenizer/vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

Downloading (…)38c6/vae/config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading (…)on_pytorch_model.bin:   0%|          | 0.00/335M [00:00<?, ?B/s]

Downloading (…)on_pytorch_model.bin:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

exit proxy context http://192.168.1.45:10809


ConnectionError: HTTPSConnectionPool(host='cdn-lfs.huggingface.co', port=443): Read timed out.