In [1]:
from typing import cast, List, Dict, Union
from transformers import AutoModel, AutoTokenizer
import torch
import numpy as np

tokenizer = AutoTokenizer.from_pretrained('qihoo360/360Zhinao-search')
model = AutoModel.from_pretrained('qihoo360/360Zhinao-search')
sentences = ['年假有多少天', '年休假有多少天']
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt', max_length=512)

if __name__ == "__main__":

    with torch.no_grad():
        last_hidden_state = model(**inputs, return_dict=True).last_hidden_state
        embeddings = last_hidden_state[:, 0]
        embeddings = torch.nn.functional.normalize(embeddings, dim=-1)
        embeddings = embeddings.cpu().numpy()

    print("embeddings:")
    print(embeddings)

    cos_sim = np.dot(embeddings[0], embeddings[1])
    print("cos_sim:", cos_sim)
    print(embeddings[0] @ embeddings[1])



embeddings:
[[-0.02434619  0.01246711  0.00044789 ... -0.0161618   0.02780671
  -0.03590978]
 [-0.02255568  0.02330586  0.01216126 ... -0.0140173   0.02685748
  -0.03161586]]
cos_sim: 0.93176043
0.93176043


In [1]:
from sentence_transformers import SentenceTransformer
sentences_1 = ["年假有多少天"]
sentences_2 = ["年休假有多少天", ]
model = SentenceTransformer('qihoo360/360Zhinao-search')
embeddings_1 = model.encode(sentences_1, normalize_embeddings=True)
embeddings_2 = model.encode(sentences_2, normalize_embeddings=True)
similarity = embeddings_1 @ embeddings_2.T
print(similarity)

  from tqdm.autonotebook import tqdm, trange
No sentence-transformers model found with name qihoo360/360Zhinao-search. Creating a new one with mean pooling.


[[0.93577266]]


In [1]:
from sentence_transformers import SentenceTransformer
sentences_1 = ["年假有多少天"]
sentences_2 = ["年休假有多少天", ]
model = SentenceTransformer('thenlper/gte-large-zh')
embeddings_1 = model.encode(sentences_1, normalize_embeddings=True)
embeddings_2 = model.encode(sentences_2, normalize_embeddings=True)
similarity = embeddings_1 @ embeddings_2.T
print(similarity)

  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/386 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/32.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/810 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/651M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/439k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/192 [00:00<?, ?B/s]

[[0.95923924]]


In [2]:
from sentence_transformers import SentenceTransformer
sentences_1 = ["年假有多少天"]
sentences_2 = ["年休假有多少天", ]
model = SentenceTransformer('TownsWu/PEG')
embeddings_1 = model.encode(sentences_1, normalize_embeddings=True)
embeddings_2 = model.encode(sentences_2, normalize_embeddings=True)
similarity = embeddings_1 @ embeddings_2.T
print(similarity)

No sentence-transformers model found with name TownsWu/PEG. Creating a new one with mean pooling.


config.json:   0%|          | 0.00/945 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/651M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/394 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/110k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/439k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

[[0.97681266]]
