In [1]:
import openai, os
import faiss
# from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTFaissIndex, ServiceContext
from llama_index import SimpleDirectoryReader, LangchainEmbedding, ServiceContext
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import SpacyTextSplitter, CharacterTextSplitter
from langchain.chat_models import ChatOpenAI

openai.api_key = ""

In [2]:

from llama_index.node_parser import SimpleNodeParser

text_splitter = CharacterTextSplitter(separator="\n\n", chunk_size=100, chunk_overlap=20)
parser = SimpleNodeParser(text_splitter=text_splitter)
documents = SimpleDirectoryReader('./data/faq/').load_data()
nodes = parser.get_nodes_from_documents(documents)

embed_model = LangchainEmbedding(HuggingFaceEmbeddings(
    model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
))
service_context = ServiceContext.from_defaults(embed_model=embed_model)



INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
INFO:sentence_transformers.SentenceTransformer:Use pytorch device: cpu


In [3]:
#dimension = 768
#faiss_index = faiss.IndexFlatIP(dimension)
#index = GPTFaissIndex(nodes=nodes,faiss_index=faiss_index, service_context=service_context)



from llama_index import GPTVectorStoreIndex

new_index = GPTVectorStoreIndex(nodes)

# query with embed_model specified
query_engine = new_index.as_query_engine(
    retriever_mode="embedding", 
    verbose=True, 
    service_context=service_context
)



INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 3198 tokens


In [4]:
#from llama_index import QueryMode

#openai.api_key = os.environ.get("OPENAI_API_KEY")

#response = index.query(
#    "请问你们海南能发货吗？", 
#    mode=QueryMode.EMBEDDING,
#    verbose=True, 
#)

response = query_engine.query(
    "请问你们海南能发货吗？", 
#    mode=QueryMode.EMBEDDING,
#    verbose=True, 
)

print(response)

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 24 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 550 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens



是的，我们支持全国大部分省份的配送，其中包括海南省。


In [5]:
response = query_engine.query(
    "你们用哪些快递公司送货？", 
)

print(response)

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 27 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 442 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens



我们与顺丰速运、圆通速递、申通快递、韵达快递、中通快递、百世快递等多家知名快递公司合作，用他们的服务送货。


In [6]:
#response = index.query(
#    "你们的退货政策是怎么样的？", 
#    mode=QueryMode.EMBEDDING,
#    verbose=True, 
#)

response = query_engine.query(
    "你们的退货政策是怎么样的？", 
)

print(response)

INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens
INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 27 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 438 tokens
INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens



自收到商品之日起7天内，如产品未使用、包装完好，您可以申请退货。某些特殊商品可能不支持退货，请在购买前查看商品详情页面的退货政策。
