In [2]:
# FireWorks
# https://python.langchain.com/v0.1/docs/integrations/text_embedding/fireworks/
# https://readme.fireworks.ai/docs/querying-embeddings-models#list-of-available-models 
from langchain_fireworks import FireworksEmbeddings
from dotenv import load_dotenv
import numpy as np

load_dotenv()
embedding_model = FireworksEmbeddings(model="nomic-ai/nomic-embed-text-v1.5")

text_li = ['How is the weather today?', '今天天气怎么样?']
doc_result = embedding_model.embed_documents(text_li)
print(f"doc_result[0]的模: {np.linalg.norm(doc_result[0])}")
print(f"doc_result[1]的模: {np.linalg.norm(doc_result[1])}")
cosine_similarity_normalized = np.dot(doc_result[0], doc_result[1])
print(f"doc_result[0]与doc_result[1]之间的相似度: {cosine_similarity_normalized}")

doc_result[0]的模: 0.9997713049891235
doc_result[1]的模: 0.99977603553449
doc_result[0]与doc_result[1]之间的相似度: 0.42591513161269745


In [7]:
# BAAI
# https://python.langchain.com/v0.1/docs/integrations/text_embedding/bge_huggingface/
# https://huggingface.co/BAAI/bge-large-zh-v1.5
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-large-zh-v1.5"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embedding_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

text_li = ['How is the weather today?', '今天天气怎么样?']
doc_result = embedding_model.embed_documents(text_li)
print(f"doc_result[0]的模: {np.linalg.norm(doc_result[0])}")
print(f"doc_result[1]的模: {np.linalg.norm(doc_result[1])}")
cosine_similarity_normalized = np.dot(doc_result[0], doc_result[1])
print(f"doc_result[0]与doc_result[1]之间的相似度: {cosine_similarity_normalized}")

doc_result[0]的模: 0.9999999668938295
doc_result[1]的模: 1.0000000349748062
doc_result[0]与doc_result[1]之间的相似度: 0.6709808574437868


In [8]:
# BAAI
# https://python.langchain.com/v0.1/docs/integrations/text_embedding/bge_huggingface/
# https://huggingface.co/BAAI/bge-m3
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-m3"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embedding_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

text_li = ['How is the weather today?', '今天天气怎么样?']
doc_result = embedding_model.embed_documents(text_li)
print(f"doc_result[0]的模: {np.linalg.norm(doc_result[0])}")
print(f"doc_result[1]的模: {np.linalg.norm(doc_result[1])}")
cosine_similarity_normalized = np.dot(doc_result[0], doc_result[1])
print(f"doc_result[0]与doc_result[1]之间的相似度: {cosine_similarity_normalized}")

doc_result[0]的模: 0.999999959239315
doc_result[1]的模: 1.0000000772689546
doc_result[0]与doc_result[1]之间的相似度: 0.9607147514729826
