# 以京东云官网文档为基础的RAG实验

## 安装依赖包

In [None]:
import os
os.environ["http_proxy"] = "http://127.0.0.1:1083"
os.environ["https_proxy"] = "http://127.0.0.1:1083"

%pip install langchain_community
%pip install langchain
%pip install bitsandbytes
%pip install accelerate
%pip install vllm

## 数据处理
### 为文档添加.txt后缀，便于后期文档处理
https://github.com/jiashiwen/datatoolkits

## 数据处理

In [None]:
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain_community.document_loaders import DirectoryLoader
import os
os.environ["http_proxy"] = "http://127.0.0.1:1083"
os.environ["https_proxy"] = "http://127.0.0.1:1083"


loader = DirectoryLoader(
    './jdcloud-docs', glob="**/*.txt", loader_cls=TextLoader)
docs = loader.load()


splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=50)
chunked_docs = splitter.split_documents(docs)

## 向量存入 clickhouse

In [None]:
import langchain_community.vectorstores.clickhouse as clickhouse
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5")
settings = clickhouse.ClickhouseSettings(
    table="jd_docs", username="default", password="root", host="10.0.16.88")
docsearch = clickhouse.Clickhouse.from_documents(
    chunked_docs, embeddings, config=settings)

In [None]:
# 使用 m3e 进行向量检索
import langchain_community.vectorstores.clickhouse as clickhouse
from langchain.embeddings import HuggingFaceEmbeddings

model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(
    model_name="moka-ai/m3e-large", model_kwargs=model_kwargs)


settings = clickhouse.ClickhouseSettings(
    table="jd_docs_m3e", username="default", password="root", host="10.0.16.88")
docsearch = clickhouse.Clickhouse.from_documents(
    chunked_docs, embeddings, config=settings)

## 从 clickhouse 创建 文档检索 retriever

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
import langchain_community.vectorstores.clickhouse as clickhouse
# import os
# os.environ["http_proxy"] = "http://127.0.0.1:1083"
# os.environ["https_proxy"] = "http://127.0.0.1:1083"
# 使用 m3e-large embemdding
model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(
    model_name="/root/models/moka-ai-m3e-large", model_kwargs=model_kwargs)

settings = clickhouse.ClickhouseSettings(
    table="jd_docs_m3e", username="default", password="Git785230", host="10.0.1.94")
ck_db = clickhouse.Clickhouse(embeddings, config=settings)

# retriever = ck_db.as_retriever(
#     search_type="similarity", search_kwargs={"k": 1})
retriever = ck_db.as_retriever(
    search_type="similarity", search_kwargs={"k": 1, 'score_threshold': 0.8})


# r = retriever.invoke("如何创建负载均衡")
r = retriever.invoke("京东云有和dataworks对标的产品吗")
r

## 加载量化模型

In [None]:
# ToDo 选择好的中文模型

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name = "HuggingFaceH4/zephyr-7b-beta"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name, token="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", quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# 尝试 llama3 中文 vllm
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import os
# from vllm import LLM
from langchain_community.llms import VLLM


model_name = "/root/models/Llama3-Chinese-8B-Instruct"
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
# )

# model = AutoModelForCausalLM.from_pretrained(
#     model_name, token="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", quantization_config=bnb_config)
# model = AutoModelForCausalLM.from_pretrained(
#     model_name, token="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")


tokenizer = AutoTokenizer.from_pretrained(model_name)
llm = VLLM(
    model=model_name,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=400,
)

## 尝试qwen

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from langchain_community.llms import VLLM

model_name = "/root/models/Qwen1.5-1.8B-Chat"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

# model = AutoModelForCausalLM.from_pretrained(
#     model_name, token="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", quantization_config=bnb_config)
# tokenizer = AutoTokenizer.from_pretrained(
#     model_name, token="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",)

tokenizer = AutoTokenizer.from_pretrained(model_name)
llm = VLLM(
    model=model_name,
    torch_dtype="auto",
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=400,
)

## 设置 LLM 链

In [None]:
# Todo 了解langchain 语法流程，进行改造

from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field


# text_generation_pipeline = pipeline(
#     model=model,
#     tokenizer=tokenizer,
#     task="text-generation",
#     temperature=0.2,
#     do_sample=True,
#     repetition_penalty=1.1,
#     return_full_text=False,
#     max_new_tokens=400,
# )

# llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
你是一个云技术专家
使用以下检索到的Context回答问题。
如果不知道答案，就说不知道。
用中文回答问题。
Question: {question}
Context: {context}
Answer: 
"""


output_parser = JsonOutputParser()
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
    # partial_variables={"format_instructions": output_parser.get_format_instructions()},
)
p = prompt.format(context="huaidan", question="你是谁")
p

# llm(p)
# llm_chain = prompt | llm

## 调用 rag

In [None]:

from langchain_core.runnables import RunnablePassthrough

retriever = ck_db.as_retriever()
rag_chain = {"context": retriever,
             "question": RunnablePassthrough()} | llm_chain

In [None]:
context = retriever.invoke("如何挂载弹性公网ip")
context
llm_chain.invoke({"context": context, "question": "如何挂载弹性公网ip"})

In [None]:

question = "如何创建云主机"

# llm_chain.invoke({"context": "", "question": question})
r = rag_chain.invoke(question)
print(r)