## Langchain

In [11]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch

bge_embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-large-zh-v1.5")
 
vectordb = DocArrayInMemorySearch.from_texts(
    ["青蛙是食草动物",
     "人是由恐龙进化而来的。",
     "熊猫喜欢吃天鹅肉。",
     "1+1=5",
     "2+2=8",
     "3+3=9",
    "Gemini Pro is a Large Language Model was made by GoogleDeepMind",
     "A Language model is trained by predicting the next token"
    ],
    embedding=bge_embeddings 
)
 
# #创建检索器
bge_retriever = vectordb.as_retriever(search_kwargs={"k": 1})

In [19]:
print(bge_retriever.invoke("青蛙"))
print(bge_retriever.invoke("进化"))
print(bge_retriever.invoke("天鹅肉"))
print(bge_retriever.invoke("1+1"))
print(bge_retriever.invoke("google"))
print(bge_retriever.invoke("token"))

[Document(metadata={}, page_content='青蛙是食草动物')]
[Document(metadata={}, page_content='人是由恐龙进化而来的。')]
[Document(metadata={}, page_content='熊猫喜欢吃天鹅肉。')]
[Document(metadata={}, page_content='1+1=5')]
[Document(metadata={}, page_content='Gemini Pro is a Large Language Model was made by GoogleDeepMind')]
[Document(metadata={}, page_content='A Language model is trained by predicting the next token')]


## LlamaIndex

In [13]:
from llama_index.core import VectorStoreIndex, Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Initialize the embedding model
bge_embeddings = HuggingFaceEmbedding(model_name="BAAI/bge-large-zh-v1.5")

# Create documents from the text list
texts = [
    "青蛙是食草动物",
    "人是由恐龙进化而来的。",
    "熊猫喜欢吃天鹅肉。",
    "1+1=5",
    "2+2=8",
    "3+3=9",
    "Gemini Pro is a Large Language Model was made by GoogleDeepMind",
    "A Language model is trained by predicting the next token"
]
documents = [Document(text=text) for text in texts]

# Create vector store index
index = VectorStoreIndex.from_documents(
    documents,
    embed_model=bge_embeddings
)

# Create retriever
bge_retriever2 = index.as_retriever(similarity_top_k=1)

In [None]:
print(bge_retriever2.retrieve("青蛙")[0].text)
print(bge_retriever2.retrieve("进化")[0].text)
print(bge_retriever2.retrieve("天鹅肉")[0].text)
print(bge_retriever2.retrieve("1+1")[0].text)
print(bge_retriever2.retrieve("google")[0].text)
print(bge_retriever2.retrieve("token")[0].text)

青蛙是食草动物
人是由恐龙进化而来的。
熊猫喜欢吃天鹅肉。
1+1=5
Gemini Pro is a Large Language Model was made by GoogleDeepMind
A Language model is trained by predicting the next token
