In [4]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())  # 导入环境

In [2]:
from zhipu_llm import ChatZhipuAI
llm = ChatZhipuAI(
    temperature=0.1,
    model_name="glm-4",
)
llm.invoke("langsmith如何帮助测试?").content

'Langsmith是一个语言建模工具，可以帮助开发者测试和评估语言模型的性能。它可以通过提供一系列的任务和测试来评估模型的准确性、流畅度和语法正确性。使用Langsmith可以帮助开发者发现和修复模型中的错误和问题，从而提高模型的质量和性能。具体来说，Langsmith可以帮助测试以下方面：\n\n1. 准确性：通过提供一系列的标注数据和未标注数据，评估模型在特定任务上的准确率。\n\n2. 流畅度：通过评估模型生成的文本的自然度和流畅度，来衡量模型的语言表达能力和创造力。\n\n3. 语法正确性：通过评估模型生成的文本的语法结构和句法正确性，来衡量模型的语言理解和生成能力。\n\n4. 泛化能力：通过在不同的数据集和任务上测试模型的性能，来评估模型的泛化能力和适应性。\n\n5. 鲁棒性：通过在不同的输入和条件下测试模型的性能，来评估模型对异常和噪声的鲁棒性。\n\n使用Langsmith可以帮助开发者更好地理解和评估语言模型的性能，发现和修复模型中的问题，从而提高模型的质量和效果。'

In [3]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are world class technical documentation writer."),
    ("user", "{input}")
])

chain = prompt | llm   #  combine these into a simple LLM chain
#  it should respond in a more proper tone for a technical writer!
chain.invoke({"input": "how can langsmith help with testing?"})  

AIMessage(content="Langsmith is an AI tool designed to help developers create, maintain, and improve natural language processing (NLP) models. While Langsmith itself is not specifically focused on testing, it can indirectly support testing efforts in several ways:\n\n1. **Model Training and Evaluation**: Langsmith can be used to train and evaluate NLP models on test datasets. By doing so, developers can assess the performance of their models and identify areas where they may be failing, which is a critical part of the testing process.\n\n2. **Data Annotation**: Langsmith can assist in data annotation, which is the process of labeling data with relevant tags or categories. High-quality annotated data is essential for testing NLP models, as it provides the ground truth against which model outputs can be compared.\n\n3. **Model Debugging**: If an NLP model is not performing as expected, Langsmith can help in debugging by providing insights into the model's decision-making process. This ca

In [4]:
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()
chain = prompt | llm | output_parser  #  add this to the previous chain
#  The answer will now be a string (rather than a ChatMessage).
chain.invoke({"input": "how can langsmith help with testing?"})

"Langsmith is an AI tool designed to help developers create, maintain, and improve natural language processing (NLP) models. While Langsmith itself is not specifically focused on testing, it can indirectly support testing efforts in several ways:\n\n1. **Model Training and Evaluation**: Langsmith can be used to train and evaluate NLP models on test datasets. By doing so, developers can assess the performance of their models and identify areas where they may be failing, which is a critical part of the testing process.\n\n2. **Data Annotation**: Langsmith can assist in data annotation, which is the process of labeling data with relevant tags or categories. High-quality annotated data is essential for testing NLP models, as it provides the ground truth against which model outputs can be compared.\n\n3. **Model Debugging**: If an NLP model is not performing as expected, Langsmith can help in debugging by providing insights into the model's decision-making process. This can help developers 

In [5]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS  # 矢量数据库
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain


loader = WebBaseLoader("https://docs.smith.langchain.com/overview")
docs = loader.load()

embeddings = OpenAIEmbeddings()
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
vector = FAISS.from_documents(documents, embeddings)  # 嵌入索引

prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt)  # prompt+llm
retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)  # prompt + llm + retriever
response = retrieval_chain.invoke({"input": "how can langsmith help with testing?"})
print(response["answer"])

APITimeoutError: Request timed out.

In [6]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import bs4
from langchain_ai21 import AI21Embeddings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
persist_directory = "VectorStore/"

bs_strainer = bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))
loader = WebBaseLoader(  # 使用  WebBaseLoader  来将  HTML  页面中的所有文本加载到文档格式中，以便我们可以使用下游。
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs_strainer},  # 指定了在解析HTML或XML文档时应该仅考虑包含特定CSS类的标签。
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
splits = text_splitter.split_documents(docs)

embeding = HuggingFaceEmbedding(
        model_name="aiops24-RAG-demo-glm/demo/BAAI/bge-small-zh-v1.5",
        cache_folder="./",
        embed_batch_size=128,
    )
# 
# embeddings = AI21Embeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embeddings, persist_directory=persist_directory)
vectordb.persist()

AttributeError: 'HuggingFaceEmbedding' object has no attribute 'embed_documents'

In [None]:
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)  # 加载向量数据库
model_name= "glm-4"
# temperature = 0.5
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")  # RAG提示模板

In [4]:
from typing import List
import qdrant_client
from llama_index.legacy.llms import OpenAILike as OpenAI
from dotenv import dotenv_values
from llama_index.core.llms.llm import LLM
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.postprocessor.types import BaseNodePostprocessor
from llama_index.core.vector_stores import VectorStoreQuery
from llama_index.core import (
    QueryBundle,
    PromptTemplate,
    StorageContext,
    VectorStoreIndex,
)
from llama_index.core.embeddings import BaseEmbedding
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore
from llama_index.core.base.llms.types import CompletionResponse
from llama_index.postprocessor.rankgpt_rerank import RankGPTRerank

class QdrantRetriever(BaseRetriever):
    def __init__(
            self,
            vector_store: QdrantVectorStore,
            embed_model: BaseEmbedding,
            similarity_top_k: int = 2,
    ) -> None:
        self._vector_store = vector_store
        self._embed_model = embed_model
        self._similarity_top_k = similarity_top_k
        super().__init__()

    async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        query_embedding = self._embed_model.get_query_embedding(query_bundle.query_str)
        vector_store_query = VectorStoreQuery(
            query_embedding, similarity_top_k=self._similarity_top_k
        )
        query_result = await self._vector_store.aquery(vector_store_query)

        node_with_scores = []
        for node, similarity in zip(query_result.nodes, query_result.similarities):
            node_with_scores.append(NodeWithScore(node=node, score=similarity))
        return node_with_scores

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        query_embedding = self._embed_model.get_query_embedding(query_bundle.query_str)
        vector_store_query = VectorStoreQuery(
            query_embedding, similarity_top_k=self._similarity_top_k
        )
        query_result = self._vector_store.query(vector_store_query)

        node_with_scores = []
        for node, similarity in zip(query_result.nodes, query_result.similarities):
            node_with_scores.append(NodeWithScore(node=node, score=similarity))
        return node_with_scores

In [1]:
import asyncio
import time

from dotenv import dotenv_values
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.legacy.llms import OpenAILike as OpenAI
from qdrant_client import models
from tqdm.asyncio import tqdm

from pipeline.ingestion import build_pipeline, build_vector_store, read_data
from pipeline.qa import read_jsonl, save_answers
from pipeline.rag import QdrantRetriever, generation_with_knowledge_retrieval

ModuleNotFoundError: No module named 'pipeline'

In [7]:
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import QueryBundle
from llama_index.postprocessor.rankgpt_rerank import RankGPTRerank
from llama_index.legacy.llms import OpenAILike as OpenAI
import pandas as pd
from IPython.display import display, HTML
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from dotenv import dotenv_values
import asyncio
import time

from dotenv import dotenv_values
from pipeline.ingestion import build_pipeline, build_vector_store, read_data
from pipeline.qa import read_jsonl, save_answers
from pipeline.rag import QdrantRetriever, generation_with_knowledge_retrieval

# load documents
documents = SimpleDirectoryReader("./data_wiki/").load_data()

def get_retrieved_nodes(
    query_str, vector_top_k=10, reranker_top_n=3, with_reranker=False
):
    embeding = HuggingFaceEmbedding(
        model_name="BAAI/bge-large-zh-v1.5",
        cache_folder="./",
        embed_batch_size=128,
    )
    Settings.embed_model = embeding
    query_bundle = QueryBundle(query_str)
    config = dotenv_values(".env")
    client, vector_store = build_vector_store(config, reindex=False)
    # configure retriever
    retriever = QdrantRetriever(vector_store, embeding, similarity_top_k=vector_top_k)
    retrieved_nodes = retriever.retrieve(query_bundle)
    config = dotenv_values(".env")
    if with_reranker:
        # configure reranker
        reranker = RankGPTRerank(
            llm = OpenAI(
            api_key=config["GLM_KEY"],
            model="glm-4",
            api_base="https://open.bigmodel.cn/api/paas/v4/",
            is_chat_model=True,
        ),
            top_n=reranker_top_n,
            verbose=True,
        )
        retrieved_nodes = reranker.postprocess_nodes(
            retrieved_nodes, query_bundle
        )

    return retrieved_nodes


def pretty_print(df):
    return display(HTML(df.to_html().replace("\\n", "<br>")))


def visualize_retrieved_nodes(nodes) -> None:
    result_dicts = []
    for node in nodes:
        result_dict = {"Score": node.score, "Text": node.node.get_text()}
        result_dicts.append(result_dict)

    pretty_print(pd.DataFrame(result_dicts))

In [9]:
new_nodes = get_retrieved_nodes(
    "Which date did Paul Gauguin arrive in Arles ?",
    vector_top_k=10,
    reranker_top_n=3,
    with_reranker=True,
)

pytorch_model.bin:   0%|          | 0.00/1.30G [00:00<?, ?B/s]

KeyboardInterrupt: 