In [1]:
import os
import getpass
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_community.chat_models import ChatTongyi

# Get your DashScope API key from Alibaba Cloud
if not os.environ.get("DASHSCOPE_API_KEY"):
    os.environ["DASHSCOPE_API_KEY"] = getpass.getpass("Enter API key for Alibaba Cloud DashScope: ")

# Initialize the embedding model
embeddings = DashScopeEmbeddings(model="text-embedding-v2")

# Initialize the Qwen LLM
llm = ChatTongyi(model="qwen-turbo")

Enter API key for Alibaba Cloud DashScope:  ········


In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS

# 1. Load documents from a text file


file_path = "my_simple/nke-10k-2023.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

print(len(docs))

# 2. Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)

# 3. Create a vector store using Qwen embeddings
vector = FAISS.from_documents(documents, embeddings)

# 4. Create a retriever from the vector store
retriever = vector.as_retriever()

# 5. Define the prompt template
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer the question. "
    "If you don't know the answer, just say that you don't know. "
    "Use three sentences maximum and keep the answer concise.\n\n"
    "{context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

# 6. Create the chains
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# 7. Invoke the chain with a question
response = rag_chain.invoke({"input": "Your question here based on your document."})
print(response["answer"])