In [4]:
import os
import json
import faiss
import pickle
from langchain import OpenAI
from langchain.chains import VectorDBQAWithSourcesChain
from langchain.prompts.prompt import PromptTemplate
from langchain.embeddings import LlamaCppEmbeddings

LlamaCppEmbeddings.embed_documents


<function langchain.embeddings.llamacpp.LlamaCppEmbeddings.embed_documents(self, texts: List[str]) -> List[List[float]]>

In [5]:
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"

In [6]:
with open("faiss_store.pkl", "rb") as f:
    store = pickle.load(f)

store.index = faiss.read_index("docs.index")


prompt_template = """使用上下文来回答最后的问题。如果你不知道答案，就说你不知道，不要试图编造答案。

{context}

问题: {question}
中文答案:"""

print("initialize chain")
chain = VectorDBQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0),
                                            vectorstore=store,
                                            question_prompt=PromptTemplate(template=prompt_template, 
                                                                           input_variables=["context", "question"]))


def search(query: str):
    # query = "Transformer的网络结构"
    print(f"问题: {query}")
    result = chain({"question": query}, return_only_outputs=True)
    # print(json.dumps(result, ensure_ascii=False, indent=4))
    print(f"答案: {result['answer']}")
    print(f"来源: {result['sources']}")

initialize chain




In [7]:

query_list = [
    # "详细介绍下Transformer architecture",
    # "什么是pipeline parallel?",
    # "详细介绍下Data parallel",
    "Wenet模型网络结构",
    "Wenet解码方式有哪些？",
    "介绍下megatron框架",
]

for query in query_list:
    search(query)
    print("\n")

问题: Wenet模型网络结构


Token indices sequence length is longer than the specified maximum sequence length for this model (2297 > 1024). Running this sequence through the model will result in indexing errors


答案:  Wenet model network structure is a hybrid Connectionist Time Classification (CTC)/Attention architecture, where Transformers or Conformers are used as encoders and Attention decoders are used to re-score the CTC hypotheses. To achieve a unified model for streaming and non-streaming, we use a dynamic block-based attention policy that allows self-attention to focus on the correct context with random.

来源: /data/datasets/papers/Wenet-Production-Oriented-Streaming-and-Non-streaming-End-to-End-Speech-Recognition-Toolkit.pdf


问题: Wenet解码方式有哪些？


Token indices sequence length is longer than the specified maximum sequence length for this model (2070 > 1024). Running this sequence through the model will result in indexing errors


答案:  Wenet supports n-gram, WFST, CTC prefix beam search, CTC WFST search, and attention re-scoring decoding methods.

来源: /data/datasets/papers/Wenet-Production-Oriented-Streaming-and-Non-streaming-End-to-End-Speech-Recognition-Toolkit.pdf, /data/datasets/papers/Wenet2.0-More-Productive-End-to-End-Speech-Recognition-Toolkit.pdf


问题: 介绍下megatron框架


Token indices sequence length is longer than the specified maximum sequence length for this model (2576 > 1024). Running this sequence through the model will result in indexing errors


答案:  Megatron is a large-scale transformer language model training framework developed jointly by Microsoft and NVIDIA. It can use DeepSpeed and Megatron to train super large-scale transformer language models and can adapt to downstream tasks through zero-shot, few-shot, and fine-tuning techniques. It is a distributed multi-processing framework based on NVIDIA that can help users train large models more efficiently. It supports various distributed training techniques, including data parallelism (DP), model parallelism (MP), and parameter parallelism (PP). Megatron-LM is a model parallelism technique based on Megatron that can help users train large models more efficiently. It combines tensor parallelism, pipeline parallelism, and data parallelism to scale up to thousands of GPUs. It also proposes a new interleaved pipeline scheduling that can improve throughput by more than 10% with memory usage comparable to existing methods.

来源: /data/datasets/papers/MP-Using-DeepSpeed-and-Megatron-