# **Build RAG with LangChain**

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
# ========== INDEXING ==========
loader = TextLoader("cn1.md", encoding="utf-8")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=150,
    separators=["\n## ", "\n# ", "\n\n", "\n", "。", "!", "?", ";", " ", ""]
)
splits = text_splitter.split_documents(docs)
print(f"Split into {len(splits)} chunks")

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,
    collection_name="chinese_fiction_langchain"
)

Split into 15 chunks


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]



config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/526 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
import os
from dotenv import load_dotenv

load_dotenv()
print("GEMINI_API_KEY:", "✓ exists" if os.getenv("GEMINI_API_KEY") else "✗ missing")
print("GOOGLE_API_KEY:", "✓ exists" if os.getenv("GOOGLE_API_KEY") else "✗ missing")

GEMINI_API_KEY: ✓ exists
GOOGLE_API_KEY: ✓ exists


In [8]:
# ========== RETRIEVAL (no reranking for now) ==========
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# ========== GENERATION ==========
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

prompt = ChatPromptTemplate.from_template("""你是一个专业的小说问答助手。请基于以下提供的小说片段来回答用户的问题。

小说片段:
{context}

用户问题: {question}

请根据上述片段提供准确、详细的回答。如果片段中没有足够信息回答问题,请说明。""")

def format_docs(docs):
    return "\n\n".join([f"[片段 {i+1}]\n{doc.page_content}" for i, doc in enumerate(docs)])

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# ========== QUERY ==========
query = "哈利的守护神兽是什么样子的?"
answer = rag_chain.invoke(query)

print(f"\n问题: {query}")
print(f"\n答案: {answer}")

Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.



问题: 哈利的守护神兽是什么样子的?

答案: 根据小说片段，哈利的守护神兽是**牡鹿状**的。

片段2中提到：“伴随一道银光，一头牡鹿状的守护神腾空而起，踏月而行，朝城堡方向飞奔而去。”
