# 调用文心一言Embedding API

## 1. 原生接口调用

In [4]:
from dotenv import find_dotenv, load_dotenv
import os

# 读取本地/项目的环境变量。

# find_dotenv()寻找并定位.env文件的路径
# load_dotenv()读取该.env文件，并将其中的环境变量加载到当前的运行环境中
# 如果你设置的是全局的环境变量，这行代码则没有任何作用。
# _ = load_dotenv(find_dotenv())
_ = load_dotenv('./个人知识库问答助手项目/.env')

# 获取环境变量 OPENAI_API_KEY
wenxin_api_key = os.environ["wenxin_api_key"]
wenxin_secret_key = os.environ["wenxin_secret_key"]


In [6]:
import requests
import json

def get_access_token():
    """
    使用 API Key，Secret Key 获取access_token，替换下列示例中的应用API Key、应用Secret Key
    """
    url = f"https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={wenxin_api_key}&client_secret={wenxin_secret_key}"
    payload = json.dumps("")
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json'
    }
    response = requests.request("POST", url, headers=headers, data=payload)
    return response.json().get("access_token")


url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/embedding-v1?access_token=" + get_access_token()
payload = json.dumps({"input": ["推荐一些美食","给我讲个故事"]})
headers = {'Content-Type': 'application/json'}
response = requests.request("POST", url, headers=headers, data=payload)
print(response.text)

{"id":"as-2gmnf3bthg","object":"embedding_list","created":1709006164,"data":[{"object":"embedding","embedding":[0.018314670771360397,0.00942440889775753,-0.009610665962100029,0.11749528348445892,-0.04390019550919533,-0.01204194501042366,-0.049173809587955475,0.07313172519207001,-0.04394596442580223,-0.0031423401087522507,-0.0002104247105307877,-0.03840280696749687,0.03257625922560692,-0.06292766332626343,-0.046975813806056976,-0.10594844073057175,-0.02635003626346588,0.04616350680589676,0.0492473766207695,0.01683962531387806,0.0586620457470417,0.054733432829380035,-0.054658859968185425,0.03388411924242973,-0.051863472908735275,0.08070266246795654,-0.014839539304375648,-0.0027158712036907673,0.09159722924232483,0.11055705696344376,0.01364678330719471,0.04435339197516441,0.029239347204566002,-0.06402997672557831,-0.11075606942176819,0.07460511475801468,-0.012659254483878613,0.024171341210603714,-0.06496117264032364,0.08964287489652634,0.013678735122084618,0.0590558797121048,-0.0036101271

In [3]:
# 生成的 embedding 的长度
len(response.json()["data"][0]["embedding"])

384

In [4]:
print(f'模型输入的 tokens 数量为: {response.json()["usage"]["prompt_tokens"]}')

模型输入的 tokens 数量为: 7


In [5]:
print(f'模型总的 tokens 数量为: {response.json()["usage"]["total_tokens"]}')

模型总的 tokens 数量为: 7


## 2.LangChain调用

In [6]:
from langchain_community.embeddings import QianfanEmbeddingsEndpoint
embed = QianfanEmbeddingsEndpoint(qianfan_ak=wenxin_api_key,
                                  qianfan_sk=wenxin_secret_key)
res = embed.embed_documents(["推荐一些美食","给我讲个故事"])

In [7]:
len(res[0])

384

In [11]:
res

[[0.018314670771360397,
  0.00942440889775753,
  -0.009610665962100029,
  0.11749528348445892,
  -0.04390019550919533,
  -0.01204194501042366,
  -0.049173809587955475,
  0.07313172519207001,
  -0.04394596442580223,
  -0.0031423401087522507,
  -0.0002104247105307877,
  -0.03840280696749687,
  0.03257625922560692,
  -0.06292766332626343,
  -0.046975813806056976,
  -0.10594844073057175,
  -0.02635003626346588,
  0.04616350680589676,
  0.0492473766207695,
  0.01683962531387806,
  0.0586620457470417,
  0.054733432829380035,
  -0.054658859968185425,
  0.03388411924242973,
  -0.051863472908735275,
  0.08070266246795654,
  -0.014839539304375648,
  -0.0027158712036907673,
  0.09159722924232483,
  0.11055705696344376,
  0.01364678330719471,
  0.04435339197516441,
  0.029239347204566002,
  -0.06402997672557831,
  -0.11075606942176819,
  0.07460511475801468,
  -0.012659254483878613,
  0.024171341210603714,
  -0.06496117264032364,
  0.08964287489652634,
  0.013678735122084618,
  0.0590558797121048,