In [1]:
import getpass
import os
from langchain.document_loaders import DirectoryLoader
from langchain_openai import OpenAIEmbeddings  # 导入新的 OpenAI Embeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter  # 文本分割器

# 设置 API 密钥
if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")

In [2]:
# 载入 Markdown 文档
loader = DirectoryLoader("docs", glob="**/*.md")
documents = loader.load()

# 使用 RecursiveCharacterTextSplitter 将文档分块
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(documents)

# 显示分割后的文档数量
print(f"Total split documents: {len(split_documents)}")


libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider installing libmagic for better results.
libmagic is unavailable but assists in filetype detection. Please consider insta

Total split documents: 1321


In [3]:
# 使用 OpenAI Embedding 模型为每个分块文档生成向量
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_documents(split_documents, embeddings)

# 保存 FAISS 数据库（可选）
vector_store.save_local("faiss_index")
print("FAISS 向量数据库已创建")


FAISS 向量数据库已创建


In [8]:
# 获取问题输入
question = input("请输入你的问题：")
question_embedding = embeddings.embed_query(question)

# 检索与问题最相关的文档
similar_docs = vector_store.similarity_search_by_vector(question_embedding, k=5)  # k=3 表示检索最相关的 3 个文档

# 拼接相关文档的内容
docs_text = "\n".join([doc.page_content for doc in similar_docs])

# 显示检索到的相关文档
docs_text_size = len(docs_text.encode('utf-8'))
print(f"检索到的相关文档内容（{docs_text_size} bytes）：")
print(docs_text)

检索到的相关文档内容（6013 bytes）：
执行费用（GAS） 操作码（OpCode）指令名称 0.00032768 CALLT 0.00008192 APPEND,REVERSEITEMS,SETITEM,VALUES 0.00004096 PUSHDATA4 0.00002048 CAT,CONVERT,LEFT,MEMCPY,RIGHT,SUBSTR 0.00000512 CALL,CALL_L,CALLA,NEWARRAY,NEWARRAY_T,NEWSTRUCT,PACK,PUSHDATA2,THROW,UNPACK 0.00000256 NEWBUFFER 0.00000064 HASKEY,INITSLOT,PICKITEM 0.00000032 EQUAL,NOTEQUAL 0.00000016 CLEAR,CLEARITEMS,INITSSLOT,KEYS,NEWARRAY0,NEWSTRUCT0,POPITEM,REMOVE,REVERSEN,ROLL,XDROP 0.00000008 ADD,AND,BOOLAND,BOOLOR,DIV,GE,GT,LE,LT,MAX,MIN,MOD,MUL,NEWMAP,NUMEQUAL,NUMNOTEQUAL,OR,PUSHDATA1,SHL,SHR,SUB,WITHIN,XOR 0.00000004 ABS,DEC,ENDFINALLY,ENDTRY,ENDTRY_L,INC,INVERT,NEGATE,NOT,NZ,PUSHA,PUSHINT128,PUSHINT256,SIGN,SIZE,TRY,TRY_L 0.00000002
NEWMAP

指令 NEWMAP 字节码 0xC8 系统费 0.00000008 GAS 功能 向计算栈栈顶压入一个空的Map。

SIZE

指令 SIZE 字节码 0xCA 系统费 0.00000004 GAS 功能 获取计算栈栈顶元素的大小。

HASKEY

指令 HASKEY 字节码 0xCB 系统费 0.00000064 GAS 功能 从计算栈栈顶获取索引n（或键）和数组（Map，Buffer,ByteString）。若n在数组（Map，Buffer,ByteString）的长度范围内，则向栈顶压入True，否则压入False。

KEYS

指令 KEYS

In [None]:
# 初始化 OpenAI 模型
model = ChatOpenAI(model="gpt-4o-mini")

# 设置提示模板
prompt = PromptTemplate(template="Given the following information: {docs}\nAnswer this question: {question}", input_variables=["docs", "question"])

# 创建 LLMChain
chain = LLMChain(llm=model, prompt=prompt)

# 获取模型响应
response = chain.invoke({"docs": docs_text, "question": question})

# 输出响应
print("ChatGPT 的回答：")
print(response["text"])


  chain = LLMChain(llm=model, prompt=prompt)


ChatGPT 的回答：
{'docs': '执行费用（GAS） 操作码（OpCode）指令名称 0.00032768 CALLT 0.00008192 APPEND,REVERSEITEMS,SETITEM,VALUES 0.00004096 PUSHDATA4 0.00002048 CAT,CONVERT,LEFT,MEMCPY,RIGHT,SUBSTR 0.00000512 CALL,CALL_L,CALLA,NEWARRAY,NEWARRAY_T,NEWSTRUCT,PACK,PUSHDATA2,THROW,UNPACK 0.00000256 NEWBUFFER 0.00000064 HASKEY,INITSLOT,PICKITEM 0.00000032 EQUAL,NOTEQUAL 0.00000016 CLEAR,CLEARITEMS,INITSSLOT,KEYS,NEWARRAY0,NEWSTRUCT0,POPITEM,REMOVE,REVERSEN,ROLL,XDROP 0.00000008 ADD,AND,BOOLAND,BOOLOR,DIV,GE,GT,LE,LT,MAX,MIN,MOD,MUL,NEWMAP,NUMEQUAL,NUMNOTEQUAL,OR,PUSHDATA1,SHL,SHR,SUB,WITHIN,XOR 0.00000004 ABS,DEC,ENDFINALLY,ENDTRY,ENDTRY_L,INC,INVERT,NEGATE,NOT,NZ,PUSHA,PUSHINT128,PUSHINT256,SIGN,SIZE,TRY,TRY_L 0.00000002\nNEWMAP\n\n指令 NEWMAP 字节码 0xC8 系统费 0.00000008 GAS 功能 向计算栈栈顶压入一个空的Map。\n\nSIZE\n\n指令 SIZE 字节码 0xCA 系统费 0.00000004 GAS 功能 获取计算栈栈顶元素的大小。\n\nHASKEY\n\n指令 HASKEY 字节码 0xCB 系统费 0.00000064 GAS 功能 从计算栈栈顶获取索引n（或键）和数组（Map，Buffer,ByteString）。若n在数组（Map，Buffer,ByteString）的长度范围内，则向栈顶压入True，否则压入False。\n\nK