In [1]:
!pip install langchain langchain_community langchain_chroma



In [2]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
# 检查加载的文档内容长度
print(len(docs[0].page_content))  # 打印第一个文档内容的长度
# 查看第一个文档（前100字符）
print(docs[0].page_content[:100])
# 使用 RecursiveCharacterTextSplitter 将文档分割成块，每块1000字符，重叠200字符
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
print(len(all_splits))  # 打印分割后的文档块数量
print(all_splits[0].page_content)  # 打印第一个块的内容
print(all_splits[0].metadata)  # 打印第一个块的元数据

29295


      Prompt Engineering
    
Date: March 15, 2023  |  Estimated Reading Time: 21 min  |  Author: 
43
Prompt Engineering
    
Date: March 15, 2023  |  Estimated Reading Time: 21 min  |  Author: Lilian Weng
{'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/', 'start_index': 8}


In [None]:
import os
import getpass
def _set_if_undefined(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"请输入您的 {var}")
        
        
_set_if_undefined("OPENAI_API_KEY")

# 使用 Chroma 向量存储和 OpenAIEmbeddings 模型，将分割的文档块嵌入并存储
vectorstore = Chroma.from_documents(
    documents=all_splits,
    embedding=OpenAIEmbeddings()
)
type(vectorstore) 

In [7]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
type(retriever)
retrieved_docs = retriever.invoke("how to do Prompt Engineering?")
# 检查检索到的文档内容
print(len(retrieved_docs))  # 打印检索到的文档数量

6


In [8]:
print(retrieved_docs[0].page_content)  # 打印第一个检索到的文档内容

Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.
This post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models. At its core, the goal of prompt engineering is about alignment and model steerability. Check my previous post on controllable text generation.


In [None]:
from langchain import hub
from langchain.chains import RetrievalQA
from langchain_ollama import ChatOllama
prompt = hub.pull("rlm/rag-prompt-llama")
#llm = ChatOpenAI(model="gpt-4o-mini")
llm = ChatOllama(model="llama3.2",temperature="0.5",base_url="http://192.168.22.6:11434")

# 打印模板
print(prompt.messages)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": prompt}
)
question = "how to do Prompt Engineering?"
result = qa_chain({"query": question})
result["result"]