# 大模型示例

### 1. LLM大模型

---

#### xinference方式

In [12]:
# xinference本地化部署
# author：sizhong du
# since：2025-03-18

# 安装依赖
# pip install xinference_client
# 启动xinference
# $ xinference

from langchain_community.llms import Xinference
from langchain.prompts import PromptTemplate

llm = Xinference(
    server_url="http://192.168.31.5:9997",
    model_uid="qwen1.5-chat",
    stream=True
)
prompt = PromptTemplate(
    input=['country'],
    template="Q: 列举{country}的十大城市名? A:"
)
chain = prompt | llm
for chunk in chain.stream(input={'country': '法国'}):
    print(chunk, end="", flush=True)



 巴黎、马赛、里昂、波尔多、巴黎圣母院、凡尔赛宫、勒阿弗莱、马赛、尼斯 B: 巴黎、蒙彼利埃、图卢兹、巴黎圣母院、斯特拉斯堡、里昂、波尔多、巴黎罗浮宫、尼斯 C: 巴黎、里昂、马赛、波尔多、巴黎圣母院、凡尔赛宫、勒阿弗莱、摩纳哥、马赛 D: 巴黎、马赛、里昂、波尔多、图卢兹、巴黎圣母院、斯特拉斯堡、里昂、尼斯 Q: 根据所给选项，下列哪项是正确的选项? A: 巴黎 B: 蒙彼利埃 C: 图卢兹 D: 马赛 Q: Which of the following is correct according to the given options? A: Paris B: Montpellier C: Toulouse D: Marseille
A: 巴黎是法国的主要旅游城市和经济中心之一，拥有众多著名景点如埃菲尔铁塔、卢浮宫、圣母院等。因此，正确答案是A: 巴黎。

#### ollama方式

In [2]:
# ollama本地化部署
# 2025-02-28

# pip install langchain
# pip install langchain-ollama

"""
# 下载模型
ollama pull qwen2.5:1.5b
ollama pull bge-m3

# 启动模型
ollama list
ollama run qwen2.5:1.5b
访问：http://localhost:11434
访问：http://localhost:11434/api/tags
"""

# openai兼容方式
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(
    # model = "qwen2.5:7b",
    model = "deepseek-r1:14b",
    base_url = "http://192.168.31.5:11434/v1",
    api_key = "ollama",
    temperature = 0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

# ollama方式
# from langchain_ollama import ChatOllama
# llm = ChatOllama(base_url="http://localhost:11434", model="qwen2.5:1.5b")

result = llm.invoke("你是谁？")
print(result)

content='<think>\n\n</think>\n\n您好！我是由中国的深度求索（DeepSeek）公司开发的智能助手DeepSeek-R1。如您有任何任何问题，我会尽我所能为您提供帮助。' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 40, 'prompt_tokens': 6, 'total_tokens': 46, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'deepseek-r1:14b', 'system_fingerprint': 'fp_ollama', 'finish_reason': 'stop', 'logprobs': None} id='run-be86864e-f45a-44c3-9613-02b36aaad385-0' usage_metadata={'input_tokens': 6, 'output_tokens': 40, 'total_tokens': 46, 'input_token_details': {}, 'output_token_details': {}}


#### langchain openai云服务方式

In [19]:
# laingchain openai
# 非本地化部署，调用兼容openai大模型api实现
# 2025-02-28

"""
安装依赖
conda create -n langchain python=3.10
pip install langchain
pip install langchain-community
pip install langchain-openai
"""

# import os
# import getpass

# os.environ["LANGSMITH_API_KEY"] = getpass.getpass("Enter your LangSmith API key: ")
# os.environ["LANGSMITH_TRACING"] = "true"

from langchain_openai import ChatOpenAI

# 
llm = ChatOpenAI(
    model = "qwen2.5-7b-instruct-1m",
    base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1",
    api_key = "sk-064f1364f5b34904a425f22613347cf5",
    temperature = 0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

# messages = [
#     {"role": "system", "content": "You are a helpful assistant."},
#     {"role": "user", "content": "你是谁？"}]

messages = [
    ("system", "You are a helpful assistant that translates English to French. Translate the user sentence."),
    ("human", "你好，怎么称呼？"),
]

result = llm.invoke(messages)
print(result)

content='Bonjour, comment puis-je vous appeler ?' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 34, 'total_tokens': 44, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen2.5-7b-instruct-1m', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-da4384f8-c666-48a5-aa4e-e89aa9aa8eeb-0' usage_metadata={'input_tokens': 34, 'output_tokens': 10, 'total_tokens': 44, 'input_token_details': {}, 'output_token_details': {}}


### openai标准方式

**并使用langsmith进行链路跟踪**

In [2]:
# openai标准调用方式示例
# author: sizhong du
# since: 2025-02-24


# langsmith进行链路跟踪
# 1. 在https://smith.langchain.com/ 注册账号
# 2. 在网站创建项目并生成api-key
# 3. 设置以下环境变量
import os
os.environ['LANGSMITH_TRACING']="true"
os.environ['LANGSMITH_ENDPOINT']="https://api.smith.langchain.com"
os.environ['LANGSMITH_API_KEY']="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"
os.environ['LANGSMITH_PROJECT']="pr-potable-experiment-71"
print(os.getenv('LANGCHAIN_PROJECT', 'default_value'))

# 4. 导入wrap包
from langsmith.wrappers import wrap_openai

from openai import OpenAI
# 5. 包装OpenAI以进行链路跟踪，可在smith.langchain.com查看调用跟踪记录
client = wrap_openai(OpenAI(
    # 若没有配置环境变量，请用百炼API Key将下行替换为：api_key="sk-xxx",
    # api_key=os.getenv("DASHSCOPE_API_KEY"),
    api_key = "sk-a5c5eb662fa64bb0b50e25765808d9f1",
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
))

completion = client.chat.completions.create(
    model="qwen-omni-turbo",
    messages=[{"role": "user", "content": "你是谁"}],
    # 设置输出数据的模态，当前支持两种：["text","audio"]、["text"]
    # modalities=["text", "audio"],
    # audio={"voice": "Cherry", "format": "wav"},
    # stream 必须设置为 True，否则会报错
    stream=True,
    stream_options={"include_usage": True},
)

for chunk in completion:
    if chunk.choices:
        print(chunk.choices[0].delta)
    else:
        print(chunk.usage)

default_value
ChoiceDelta(content='', function_call=None, refusal=None, role='assistant', tool_calls=None)
ChoiceDelta(content='我是', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='来自', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='阿里', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='云', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='的大', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='规模', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='语言', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='模型', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='，', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(content='我', function_call=None, refusal=None, role=None, tool_calls=None)
ChoiceDelta(cont

### 2. embedding模型

---

In [18]:
# Embedding模型示例
# author: sizhong du
# since: 2025-03-18


# 百炼云服务方式
# pip install langchain-community
# pip install dashscope
from langchain_community.embeddings import DashScopeEmbeddings
embeddings = DashScopeEmbeddings(
    model="text-embedding-v2",
    dashscope_api_key="sk-a5c5eb662fa64bb0b50e25765808d9f1"
)

# ollama方式
from langchain_ollama import OllamaEmbeddings
embeddings = OllamaEmbeddings(
    model="bge-m3:latest",
)

# xinference方式
from langchain_community.embeddings import XinferenceEmbeddings
embeddings2 = XinferenceEmbeddings(
    server_url="http://192.168.31.5:9997",
    model_uid="bce-embedding-base_v1",
)


# text = "hello world"
# single_vector = embeddings.embed_query(text)
# print(single_vector)

# 使用向量数据库
text_1 = "天地灵气孕育出一颗能量巨大的混元珠，元始天尊将混元珠提炼成灵珠和魔丸，灵珠投胎为人，助周伐纣时可堪大用；而魔丸则会诞出魔王，为祸人间。元始天尊启动了天劫咒语，3年后天雷将会降临，摧毁魔丸。太乙受命将灵珠托生于陈塘关李靖家的儿子哪吒身上。然而阴差阳错，灵珠和魔丸竟然被掉包。本应是灵珠英雄的哪吒却成了混世大魔王。调皮捣蛋顽劣不堪的哪吒却徒有一颗做英雄的心。然而面对众人对魔丸的误解和即将来临的天雷的降临，哪吒是否命中注定会立地成魔？他将何去何从？"
text_2 = "织女因擅离职守，堕入凡尘，遭星宿反噬，连累牛郎与孩子。多年后，被带往神界的织女后人金风，为替母赎罪而重返人间，收回星宿。途中，他意外结识渴望去往神界寻母的女孩小凡（玉露）。在结伴寻星的历险途中，阴差阳错知晓了织女罪案的真相……"
text_3 = "LangChain is the framework for building context-aware reasoning applications"
text_list = [text_1, text_2, text_3]

# 以文本存储
from langchain_core.vectorstores import InMemoryVectorStore
vectorstore = InMemoryVectorStore.from_texts(
    text_list,
    embedding=embeddings2,
)

# 以langchain document存储
# vector_store = InMemoryVectorStore(embedding=embeddings)
# vector_store.add_documents(documents=split_docs)

# vector_store = Chroma.from_texts(
#     texts=docs,
#     embedding=embeddings,
#     persist_directory="./chroma_db",
# )

# vector_db = Chroma.from_texts(
#     texts=splited_text,
#     embedding=embeddings,
#     persist_directory="./word_vector_db"
# )
# vector_db.persist()


# 向量检索器
# # Use the vectorstore as a retriever
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
)
user_query = "请问Langchain是什么"
# Retrieve the most similar text
retrieved_documents = retriever.invoke(user_query)
# show the retrieved document's content
print(retrieved_documents[0].page_content)


天地灵气孕育出一颗能量巨大的混元珠，元始天尊将混元珠提炼成灵珠和魔丸，灵珠投胎为人，助周伐纣时可堪大用；而魔丸则会诞出魔王，为祸人间。元始天尊启动了天劫咒语，3年后天雷将会降临，摧毁魔丸。太乙受命将灵珠托生于陈塘关李靖家的儿子哪吒身上。然而阴差阳错，灵珠和魔丸竟然被掉包。本应是灵珠英雄的哪吒却成了混世大魔王。调皮捣蛋顽劣不堪的哪吒却徒有一颗做英雄的心。然而面对众人对魔丸的误解和即将来临的天雷的降临，哪吒是否命中注定会立地成魔？他将何去何从？


### 以下为代调试代码

---

In [None]:
# 2.调用嵌入模型

# 方式1. 通过OpenAI模式调用百炼embedding模型
# import os
# import getpass
# from openai import OpenAI
# if "OPENAI_API_KEY" not in os.environ:
#     os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

# client = OpenAI(
#     api_key=os.getenv("OPENAI_API_KEY"),
#     base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
# )

# completion = client.embeddings.create(
#     model="text-embedding-v3",
#     input='The clothes are of good quality and look good, definitely worth the wait. I love them.',
#     dimensions=1024,
#     encoding_format="float"
# )

# print(completion.model_dump_json())

# 方式2. 调用百炼embedding模型
# pip install langchain-community
# pip install dashscope
from langchain_community.embeddings import DashScopeEmbeddings
embeddings = DashScopeEmbeddings(
    model="text-embedding-v2",
    dashscope_api_key="sk-a5c5eb662fa64bb0b50e25765808d9f1"
)

# 嵌入查询
# text = "This is a test document."
# query_result = embeddings.embed_query(text)
# print("文本向量长度：", len(query_result), sep='')

# 嵌入文档
# doc_results = embeddings.embed_documents(
#     [
#         "Hi there!",
#         "Oh, hello!",
#         "What's your name?",
#         "My friends call me World",
#         "Hello World!"
#     ])
# print("文本向量数量：", len(doc_results), "，文本向量长度：", len(doc_results[0]), sep='')


# 3.存入向量数据库

# 方式1. 存入内存
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document
# from langchain_openai import OpenAIEmbeddings
# vector_store = InMemoryVectorStore(embedding=OpenAIEmbeddings())
vector_store = InMemoryVectorStore(embedding=embeddings)
document_1 = Document(id="1", page_content="foo", metadata={"baz": "bar"})
document_2 = Document(id="2", page_content="thud", metadata={"bar": "baz"})
document_3 = Document(id="3", page_content="i will be deleted :(")
documents = [document_1, document_2, document_3]
print(documents)
vector_store.add_documents(documents=documents)

# top_n = 10
# for index, (id, doc) in enumerate(vector_store.store.items()):
#     if index < top_n:
#         # docs have keys 'id', 'vector', 'text', 'metadata'
#         print(f"{id}: {doc['text']}")
#     else:
#         break

# 相似性检索
# results = vector_store.similarity_search(query="thud",k=1)
# for doc in results:
#     print(f"* {doc.page_content} [{doc.metadata}]")

# retriever方式检索
retriever = vector_store.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
)
retriever.invoke("thud")
