In [4]:
from langchain.vectorstores import Chroma
# 基本配置
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv
from langchain_community.embeddings.cloudflare_workersai import CloudflareWorkersAIEmbeddings

load_dotenv(override=True)

qw_llm_openai = ChatOpenAI(
    openai_api_base=os.getenv('DASHSCOPE_API_BASE'),
    openai_api_key=os.getenv('DASHSCOPE_API_KEY'),
    model_name="qwen2-1.5b-instruct",
    temperature=0,
)
embedding = CloudflareWorkersAIEmbeddings(
    account_id=os.getenv('CF_ACCOUNT_ID'),
    api_token=os.getenv('CF_API_TOKEN'),
    model_name="@cf/baai/bge-small-en-v1.5",
)

collection_name = 'json-rag-index'
vectordb = Chroma(persist_directory="./.chroma-data", collection_name=collection_name, embedding_function=embedding)

In [2]:
import json

# 读取文件内容
with open('../../file/yxk-list-2.json', 'r', encoding='utf-8') as f:
    content = f.read()

# 将解码后的内容转换为JSON对象
json_data = json.loads(content)

In [3]:
from langchain_text_splitters import RecursiveJsonSplitter

splitter = RecursiveJsonSplitter(max_chunk_size=300)

# The splitter can also output documents
docs = splitter.create_documents(texts=[json_data], ensure_ascii=False)

In [5]:
vectordb.add_documents(docs)

['9576c3c2-c454-44cb-a73b-d9ec2c97cbf5']

In [6]:
retriever = vectordb.as_retriever()

In [8]:
retriever.invoke("婺源景德镇4日 从古徽州到瓷都的发现之旅 的好评率是多少")

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


[Document(page_content='{"product_list": [{"title": "【登峰造极·云中岭丨青青草原 高山草甸 强度可选】", "introduce": "高山牧场，牛羊成群，中转车上山，徒步2km即可到达牧场，4-10公里AB线（A线休闲，B线户外）", "theme": "户外游-徒步登山", "positive_comment_rate": "好评率：99.1%", "tags": "线路，收费，周边，旅游产品，跟团游-一日游，1天", "duration": "1天", "departure_city": "成都 四川省", "product_features": "熊猫之乡;云中牧场;高山草甸;遥望雪山", "price": "100元起", "the_best_travel_date": "7月份"}, {"title": "【婺源景德镇4日 从古徽州到瓷都的发现之旅】", "introduce": "婺源篁岭 | 梯田古村晒秋，瑶里古镇 | 探寻千年秘密，新平瓷宫 | 致敬瓷楼奶奶，陶溪川夜市 | 趣味淘宝，御窑博物馆 | 打卡宝藏机位", "theme": "深度人文-文化历史；休闲度假-懒人慢游；美景探索-游山玩水", "positive_comment_rate": "好评率：20.1%", "tags": "线路，收费，国内,旅游产品,跟团游-多日游,4天3晚", "duration": "4天3晚", "departure_city": "杭州", "product_features": "小众深度体验；陶瓷文化之旅；文艺惊艳之旅；精选舒适酒店", "price": "2180元起", "the_best_travel_date": "10月份"}, {"title": "【桂林阳朔4日 从山水画卷到民族风情的奇妙之旅】", "introduce": "漓江 | 领略如画风光，西街 | 感受热闹夜生活，龙脊梯田 | 观赏壮美稻田，象山公园 | 打卡标志性景点", "theme": "自然风光-山水如画；民俗风情-特色体验；休闲放松-轻松出行", "positive_comment_rate": "好评率：18.5%", "tags": "线路，收费，国内,旅游产品,跟团游-多日游,4天3晚", "durati