In [None]:
# test_rag.py
import os
import shutil
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma

# --- 1. 設置測試文件 ---
test_file_path = "data.txt"
with open(test_file_path, "w", encoding="utf-8") as f:
    f.write("LangChain 是 LLM 應用開發的強大框架。\n")
    f.write("Chroma DB 是一個輕量級的向量資料庫，用於儲存和檢索文件嵌入向量。\n")
    f.write("我們使用 CharacterTextSplitter 來分割長篇文件。\n")
    
print("--- 1. 檔案準備完成 ---")

# --- 2. 載入文件 (TextLoader) ---
loader = TextLoader(test_file_path, encoding="utf-8")
documents = loader.load()
print(f"--- 2. 文件載入成功，共 {len(documents)} 份 ---")

# --- 3. 分割文件 (TextSplitter) ---
# 注意：使用 from_documents 時，通常期望 documents 列表中的每個元素都是一個 Document
text_splitter = CharacterTextSplitter(chunk_size=50, chunk_overlap=0, separator="\n")
docs = text_splitter.split_documents(documents)
print(f"--- 3. 文件分割成功，共 {len(docs)} 個區塊 ---")

# --- 4. 建立嵌入模型 (HuggingFaceEmbeddings) ---
embeddings = HuggingFaceEmbeddings() 
print("--- 4. HuggingFace 嵌入模型準備就緒 ---")

# --- 5. 建立向量儲存 (Chroma) ---
persist_directory = "./test_chroma_db"
# 確保資料庫路徑被刪除，以便每次測試都能重建
if os.path.exists(persist_directory):
    shutil.rmtree(persist_directory)
    
vectorstore = Chroma.from_documents(
    documents=docs, 
    embedding=embeddings, 
    persist_directory=persist_directory 
)
print(f"--- 5. Chroma 向量資料庫建立成功，路徑: {persist_directory} ---")

# --- 6. 執行檢索 (Similarity Search) ---
query = "什麼是 LangChain？"
retrieved_docs = vectorstore.similarity_search(query, k=1)

print("\n--- 6. 檢索結果 ---")
print(f"查詢: {query}")
print(f"最相關的文件內容: {retrieved_docs[0].page_content}")

ResponseError: model is required (status code: 400)