In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

# Nếu dùng Ollama
from langchain_community.llms import Ollama

# Nếu dùng vLLM
# from langchain_openai import ChatOpenAI

# ======== 1️⃣ Nạp file PDF =========
loader = PyPDFLoader("a.pdf")  # 👉 thay tên file PDF của bạn
docs = loader.load()

# ======== 2️⃣ Chia nhỏ đoạn văn =========
splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
texts = splitter.split_documents(docs)

# ======== 3️⃣ Tạo embeddings =========
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")

# ======== 4️⃣ Tạo FAISS vector DB =========
db = FAISS.from_documents(texts, embeddings)
retriever = db.as_retriever(search_kwargs={"k": 4})

# ======== 5️⃣ Chọn model =========
# 👉 Ollama:
llm = Ollama(model="qwen2.5:1.5b")

# 👉 Hoặc vLLM:
# llm = ChatOpenAI(
#     base_url="http://localhost:8000/v1",
#     api_key="not-needed",
#     model="qwen2.5:1.5b"
# )

# ======== 6️⃣ Tạo RAG chain =========
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# ======== 7️⃣ Hỏi thử =========
query = "Tài liệu này nói về chủ đề gì?"
result = qa.invoke(query)

print("💬 Trả lời:", result["result"])
print("\n📚 Nguồn trích dẫn:")
for doc in result["source_documents"]:
    print("-", doc.metadata["source"])


RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
cannot import name 'LocalEntryNotFoundError' from 'huggingface_hub.errors' (/home/duy/miniconda3/envs/duy/lib/python3.11/site-packages/huggingface_hub/errors.py)