# 朴素 RAG 相关模块

## 向量模型

In [1]:
from illufly.types import Document
from illufly.embeddings import DashScopeEmbeddings

embeddings = DashScopeEmbeddings()
doc = [Document("这是一个测试文本")]
embeddings(doc)
print(embeddings.last_output[0].metadata['embeddings'][:3])

[-0.10436907410621643, 0.043999478220939636, -0.07047602534294128]


In [2]:
from illufly.embeddings import ZhipuEmbeddings, DashScopeEmbeddings
from illufly.io import log
import os
# os.environ["ILLUFLY_CACHE_EMBEDDINGS"] = "./__CACHE_EMBEDDINGS__"

e = DashScopeEmbeddings()
e(
    docs=[
        "illufly？ 这是一个专注于写作和对话场景的多智能体框架",
        "普鸿是做消防业务的专业公司",
        "幻蝶AI的智能体框架非常领先",
    ],
    verbose=True
)

[Document(text='illufly？ 这是一个专注于写作和对话场景的多智能体框架', metadata='{"source": "unknown", "embeddings": [-0.0847005844116211, 0.04371895268559456, 0.000247611606027931,...'),
 Document(text='普鸿是做消防业务的专业公司', metadata='{"source": "unknown", "embeddings": [-0.0868479385972023, 0.0248136967420578, -0.027287213131785393,...'),
 Document(text='幻蝶AI的智能体框架非常领先', metadata='{"source": "unknown", "embeddings": [-0.05167926475405693, 0.00564523134380579, -0.00589442625641822...')]

In [1]:
from illufly.embeddings import DashScopeEmbeddings
import os

e = DashScopeEmbeddings()
e(
    [
        "illufly？ 这是一个专注于写作和对话场景的多智能体框架",
        "普鸿是做消防业务的专业公司",
        "幻蝶AI的智能体框架非常领先",
        '模型：这里列出了被评估的模型名称，都是基于"text-embedding-v3"',
        '模型维度：这是指向量模型的维度，即每个向量在嵌入空间中的特征数量。在这个例子中，模型的维度分别为1024、768和512。',
        'MTEB：这可能是模型在某个基准测试（可能是多任务情感分析基准，Multi-Task Emotion Benchmark）上的总体准确率或得分。数值越高，表示模型在该基准测试上的表现越好。',
        'MTEB（Retrieval task）：这是模型在MTEB基准的检索任务上的得分。检索任务通常涉及到从数据集中检索与给定查询最相关的项。',
        'CMTEB：这可能是模型在某种上下文修改的MTEB基准测试上的得分。这里的“C”可能代表“Contextual”或其他某种修改版。',
        'CMTEB (Retrieval task)：这是模型在CMTEB基准的检索任务上的得分。'
    ],
    verbose=True
)

In [2]:
e.last_output

[Document(text='illufly？ 这是一个专注于写作和对话场景的多智能体框架', metadata='{"source": "unknown", "embeddings": [-0.0847005844116211, 0.04371895268559456, 0.000247611606027931,...'),
 Document(text='普鸿是做消防业务的专业公司', metadata='{"source": "unknown", "embeddings": [-0.0868479385972023, 0.0248136967420578, -0.027287213131785393,...'),
 Document(text='幻蝶AI的智能体框架非常领先', metadata='{"source": "unknown", "embeddings": [-0.05167926475405693, 0.00564523134380579, -0.00589442625641822...'),
 Document(text='模型：这里列出了被评估的模型名称，都是基于"text-embedding-v3"', metadata='{"source": "unknown", "embeddings": [-0.062373656779527664, -0.028722155839204788, -0.02404510229825...'),
 Document(text='模型维度：这是指向量模型的维度，即每个向量在嵌入空间中的特征数量。在这个例子中，模型的维度分别为1024、768和512。', metadata='{"source": "unknown", "embeddings": [-0.04857911914587021, -0.028379620984196663, -0.053788159042596...'),
 Document(text='MTEB：这可能是模型在某个基准测试（可能是多任务情感分析基准，Multi-Task Emotion Benchmark）上的总体准确率或得分。数值越高，表示模型在该基准测试上的表现越好。', metadata='{"source": "unknown", "embeddings": [-0

## FaissDB

In [4]:
import time
start = time.time()
from illufly.vectordb import FaissDB
v = FaissDB(e, train=False)
v("普鸿干嘛的？", verbose=True)
# v("幻蝶是啥？", verbose=True)
end = time.time()
print("耗时{}s".format(end-start))

  0s [INFO] [34munknown: 普鸿是做消防业务的专业公司[0m
  0s [INFO] [34munknown: 幻蝶AI的智能体框架非常领先[0m
  0s [INFO] [34munknown: MTEB（Retrieval task）：这是模型在MTEB基准的检索任务上的得分。检索任务通常涉及到从数据集中检索与给定查询最相关的项。[0m
  0s [INFO] [34munknown: MTEB：这可能是模型在某个基准测试（可能是多任务情感分析基准，Multi-Task Emotion Benchmark）上的总体准确率或得分。数值越高，表示模型在该基准测试上的表现越好。[0m
耗时0.3841080665588379s


In [5]:
v.last_output

[Document(text='普鸿是做消防业务的专业公司', metadata='{"source": "unknown", "embeddings": [-0.0868479385972023, 0.0248136967420578, -0.027287213131785393,...'),
 Document(text='幻蝶AI的智能体框架非常领先', metadata='{"source": "unknown", "embeddings": [-0.05167926475405693, 0.00564523134380579, -0.00589442625641822...'),
 Document(text='MTEB（Retrieval task）：这是模型在MTEB基准的检索任务上的得分。检索任务通常涉及到从数据集中检索与给定查询最相关的项。', metadata='{"source": "unknown", "embeddings": [-0.022207587957382202, -0.05412628874182701, -0.058304328471422...'),
 Document(text='MTEB：这可能是模型在某个基准测试（可能是多任务情感分析基准，Multi-Task Emotion Benchmark）上的总体准确率或得分。数值越高，表示模型在该基准测试上的表现越好。', metadata='{"source": "unknown", "embeddings": [-0.028462227433919907, -0.06827855855226517, -0.073512986302375...')]

In [6]:
start = time.time()
v = FaissDB(e, train=True)
v.query("幻蝶是啥？")
end = time.time()
print("耗时{}s".format(end-start))

耗时0.002691984176635742s


In [7]:
start = time.time()
v.query("幻蝶是啥？")
end = time.time()
print("耗时{}s".format(end-start))

耗时7.390975952148438e-05s


In [13]:
v._last_output

In [8]:
from illufly.embeddings import ZhipuEmbeddings, DashScopeEmbeddings
from illufly.io import log
import os

e = DashScopeEmbeddings()
e(
    docs=[
        "illufly？ 这是一个专注于写作和对话场景的多智能体框架",
        "幻蝶AI的智能体框架非常领先",
    ],
    batch_mode=False,
    verbose=True
)

[Document(text='illufly？ 这是一个专注于写作和对话场景的多智能体框架', metadata='{"source": "unknown", "embeddings": [-0.0847005844116211, 0.04371895268559456, 0.000247611606027931,...'),
 Document(text='幻蝶AI的智能体框架非常领先', metadata='{"source": "unknown", "embeddings": [-0.05167926475405693, 0.00564523134380579, -0.00589442625641822...')]

In [5]:
from illufly.embeddings import ZhipuEmbeddings, DashScopeEmbeddings
from illufly.io import log

e = DashScopeEmbeddings()
e.query("illufly 是一个专注于写作和对话场景的多智能体框架")

[-0.08643536269664764,
 0.04660885035991669,
 0.015427844598889351,
 -0.04164039343595505,
 -0.07397478818893433,
 -0.011376187205314636,
 0.006304221693426371,
 0.04455837607383728,
 -0.0052001201547682285,
 -0.007955444976687431,
 -0.001227326923981309,
 -0.0009322016849182546,
 0.006673898547887802,
 0.0005347991245798767,
 0.018828870728611946,
 -0.018947167322039604,
 0.005155758932232857,
 -0.017705054953694344,
 0.029948750510811806,
 -0.05772873014211655,
 -0.03272872045636177,
 -0.032590705901384354,
 -0.034858059138059616,
 -0.00233635725453496,
 0.03036278858780861,
 0.12153001874685287,
 -0.0472397655248642,
 -0.040615156292915344,
 0.008739159442484379,
 -0.02233833633363247,
 0.05272084102034569,
 -0.012460572645068169,
 0.02352130226790905,
 -0.0020578675903379917,
 -0.007363962009549141,
 -0.03598187491297722,
 0.033063892275094986,
 -0.003583400510251522,
 -0.09463725984096527,
 0.016265779733657837,
 0.002715892158448696,
 0.0346214659512043,
 0.015851741656661034,
 -

## 智谱AI

In [1]:
from illufly.embeddings import ZhipuEmbeddings, DashScopeEmbeddings
from illufly.io import log

e = ZhipuEmbeddings()
resp = e.embed_documents(["illufly 是一个专注于写作和对话场景的多智能体框架"])
print(resp)

[[-0.01739502, 0.003376007, 0.022491455, 0.018997192, -0.003326416, -0.00019943714, 0.003458023, 0.012626648, -0.0057525635, 0.01701355, 0.0044784546, 0.0028190613, 0.0152282715, -0.0029945374, 0.0064926147, -0.012382507, -0.0024909973, 0.007167816, -0.01828003, -0.022109985, 0.017425537, 0.042755127, -0.012611389, 0.032226562, 0.019241333, -0.010650635, 0.022888184, 0.005039215, 0.032989502, -0.013153076, 0.0029277802, 0.07342529, -0.004558563, -0.020187378, 0.02268982, 0.038848877, 0.005027771, -0.029510498, 0.009223938, 0.01448822, 0.029556274, 0.023269653, 0.020568848, -0.016220093, 0.03161621, 0.006664276, 0.019348145, -0.019561768, 0.024658203, 0.00010293722, -0.020980835, -0.0022563934, 0.0046424866, 0.010643005, -0.030395508, -0.0056419373, -0.009895325, 0.015853882, -0.011543274, 0.0025806427, -0.024429321, 0.00031161308, 0.031066895, -0.026550293, -0.00025081635, -0.011131287, 0.020431519, -0.025985718, -0.022827148, 0.0033092499, 0.013137817, 0.05480957, -0.010688782, 0.0031

In [2]:
from illufly.embeddings import ZhipuEmbeddings, DashScopeEmbeddings
from illufly.io import log

e = ZhipuEmbeddings()
resp = e.query("illufly 是一个专注于写作和对话场景的多智能体框架")
print(resp)

[-0.01739502, 0.003376007, 0.022491455, 0.018997192, -0.003326416, -0.00019943714, 0.003458023, 0.012626648, -0.0057525635, 0.01701355, 0.0044784546, 0.0028190613, 0.0152282715, -0.0029945374, 0.0064926147, -0.012382507, -0.0024909973, 0.007167816, -0.01828003, -0.022109985, 0.017425537, 0.042755127, -0.012611389, 0.032226562, 0.019241333, -0.010650635, 0.022888184, 0.005039215, 0.032989502, -0.013153076, 0.0029277802, 0.07342529, -0.004558563, -0.020187378, 0.02268982, 0.038848877, 0.005027771, -0.029510498, 0.009223938, 0.01448822, 0.029556274, 0.023269653, 0.020568848, -0.016220093, 0.03161621, 0.006664276, 0.019348145, -0.019561768, 0.024658203, 0.00010293722, -0.020980835, -0.0022563934, 0.0046424866, 0.010643005, -0.030395508, -0.0056419373, -0.009895325, 0.015853882, -0.011543274, 0.0025806427, -0.024429321, 0.00031161308, 0.031066895, -0.026550293, -0.00025081635, -0.011131287, 0.020431519, -0.025985718, -0.022827148, 0.0033092499, 0.013137817, 0.05480957, -0.010688782, 0.00317