# ChromaDB Client

## 1. 向量数据库环境配置

## 1.1 获取数据库登录信息

In [11]:
import os
import time
import chromadb

from chromadb.config import Settings
from dotenv import load_dotenv

load_dotenv()

ServerIP = os.getenv('ServerIP')
OpenAiPort = os.getenv('OpenAiPort')
ChromadbIP = os.getenv('ChromadbIP')
ChromadbPort = os.getenv('ChromadbPort')
ChromadbUser = os.getenv('ChromadbUser')
ChromadbPassword = os.getenv('ChromadbPassword')

# print(f"ServerIP: {ServerIP}")
# print(f"OpenAiPort: {OpenAiPort}")
# print(f"ChromadbIP: {ChromadbIP}")
# print(f"ChromadbPort: {ChromadbPort}")
# print(f"ChromadbUser: {ChromadbUser}")
# print(f"ChromadbPassword: {ChromadbPassword}")

## 1.2 连接向量数据库

In [12]:
dbClient = chromadb.HttpClient(host= ChromadbIP,
                             port= ChromadbPort,
                             settings=Settings(chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",
                                               chroma_client_auth_credentials=f"{ChromadbUser}:{ChromadbPassword}")
                            )
result = dbClient.heartbeat()
print(result)
# this should work with or without authentication - it is a public endpoint
version = dbClient.get_version()
print(version)

1715594084940018972
0.4.24


## 1.3 获取数据库的数据库列表

In [3]:
result = dbClient.list_collections()
print(result)

[Collection(name=question), Collection(name=mycollection), Collection(name=eschool)]


## 1.4 获取数据集的数据量

In [5]:
start_time = time.time()
collection = dbClient.get_or_create_collection(name="eschool")
count = collection.count()
end_time = time.time()
print("耗时: {:.2f}秒: 数据集数量{} ".format(end_time - start_time, count))

耗时: 0.28秒: 数据集数量59 


In [46]:
result = collection.peek(1)
print(len(result["embeddings"][0]))
# print(result["embeddings"])

1024


## 2. embedding 接口声明

In [17]:
from openai import OpenAI

base_url = "http://192.168.2.199:9001/v1/"
client = OpenAI(api_key="EMPTY", base_url=base_url)

In [15]:
def getEmbedding(string:str):
    response = client.embeddings.create(model="bge-large-zh-1.5", input=[content])
    embeddings = response.data[0].embedding
    return embeddings

In [43]:
content = "世界你好"
em = getEmbedding(content)
print("嵌入完成，维度：", len(em))
print(em)

嵌入完成，维度： 1024
[0.043814774602651596, -0.018825968727469444, -0.07392045110464096, 0.01022292859852314, -0.017410971224308014, 0.016272881999611855, -0.04854489490389824, 0.026275962591171265, 0.0010080956853926182, -0.00666902307420969, 0.017251577228307724, 0.017069479450583458, -0.008823474869132042, -0.0007065976969897747, -0.046927399933338165, 0.02287238836288452, 0.06644967198371887, -0.0021281661465764046, 0.014751054346561432, -0.03527429327368736, -0.018061408773064613, -0.017215082421898842, -0.07356918603181839, -0.009781808592379093, -0.006189774721860886, 0.024524886161088943, 0.015177643857896328, 0.012991683557629585, 0.041843388229608536, -0.05700630694627762, -0.018223686143755913, -0.006083004642277956, 0.0026466234121471643, -0.00701072346419096, 0.018056344240903854, 0.029076533392071724, 0.0003970491816289723, 0.0245522353798151, -0.0286956075578928, 0.012856295332312584, -0.008557532913982868, -0.01696266233921051, 0.007117427419871092, -0.033039629459381104, -0.0

## 3 向量数据库查询

In [13]:
collection = dbClient.get_or_create_collection (name="question")
data = collection.peek(1)
print(data)

{'ids': ['10000'], 'embeddings': [[-0.015681352466344833, 0.013566659763455391, -0.017010794952511787, 0.025677746161818504, 0.04219861328601837, 0.0019819382578134537, -0.03514780476689339, 0.00787694938480854, 0.036484573036432266, 0.015246105380356312, -0.010075022466480732, -0.018772216513752937, 0.008516710251569748, -0.00021792607731185853, 0.021786754950881004, -0.03090413473546505, 0.05249127745628357, 0.021901456639170647, 0.009633486159145832, -0.010095941834151745, 0.015810653567314148, -0.0275114756077528, -0.012604552321135998, -0.0004756384005304426, 0.034340858459472656, 0.015485054813325405, -0.06859289109706879, 0.08173288404941559, 0.04922777786850929, 0.017172403633594513, -0.0008863963303156197, -0.005920928902924061, -0.01124277338385582, 0.03967246785759926, 0.01100573968142271, -0.012774976901710033, 0.01039618905633688, 0.0017434097826480865, -0.04818843677639961, 0.03453316539525986, 0.013006024062633514, 0.002802661620080471, -0.04643644392490387, -0.015536676

In [20]:
content = "电器五方的概念"
embed = getEmbedding(content)
result = collection.query(
    query_embeddings=[embed],
    n_results=2
)
print(result)
print(result["distances"][0][0])

{'ids': [['1645125', '1653161']], 'distances': [[0.942644476890564, 0.942644476890564]], 'embeddings': None, 'metadatas': [[{'answer': 'A.盘面的制作。应按设计要求制作盘面，盘面板四周与箱边应有适当缝隙，以便在配电箱内将其固定安装 B.电器排列。电器安装前，将盘面放平，把全部电器摆放在盘面板上，按照相关的要求试排列 C.钻孔刷漆。按照电器排列的实际位置，标出每个电器安装孔和进出线孔位置，然后在盘面钻孔和刷漆 D.固定电器。等油漆干固，先在进出线孔套上瓷管头或橡皮护套以保护导线，然后将全部电器按预设位置就位，并用木螺钉或螺栓将其固定 E.盘后配线。配线要横平竖直，排列整齐，绑扎成束，用卡钉固定牢固 F.接零母线做法。接零系统的零母线，一般应由零线端子板引止各支路或设备', 'type': '多选试题'}, {'answer': 'A.盘面的制作。应按设计要求制作盘面，盘面板四周与箱边应有适当缝隙，以便在配电箱内将其固定安装 B.电器排列。电器安装前，将盘面放平，把全部电器摆放在盘面板上，按照相关的要求试排列 C.钻孔刷漆。按照电器排列的实际位置，标出每个电器安装孔和进出线孔位置，然后在盘面钻孔和刷漆 D.固定电器。等油漆干固，先在进出线孔套上瓷管头或橡皮护套以保护导线，然后将全部电器按预设位置就位，并用木螺钉或螺栓将其固定 E.盘后配线。配线要横平竖直，排列整齐，绑扎成束，用卡钉固定牢固 F.接零母线做法。接零系统的零母线，一般应由零线端子板引止各支路或设备', 'type': '多选试题'}]], 'documents': [['25.自制非标准配电箱盘面的组装和配线的步骤和要求有（\u3000）。\nA.盘面的制作。应按设计要求制作盘面，盘面板四周与箱边应有适当缝隙，以便在配电箱内将其固定安装\tB.电器排列。电器安装前，将盘面放平，把全部电器摆放在盘面板上，按照相关的要求试排列\tC.钻孔刷漆。按照电器排列的实际位置，标出每个电器安装孔和进出线孔位置，然后在盘面钻孔和刷漆\tD.固定电器。等油漆干固，先在进出线孔套上瓷管头或橡皮护套以保护导线，然后将全部电器按预设位置就位，并用木螺钉或螺栓将其固定\tE.盘后配线。配线要横平竖直

## 4 向量数据库重置

清空所有数据集内容 并删除所有数据集

In [10]:
# dbClient.reset()

True