In [1]:
from dotenv import load_dotenv

load_dotenv("openai.env")

True

# 本地大模型调用

In [3]:
from langchain_community.llms import Ollama

llm = Ollama(model="llama2-chinese")
llm.invoke(input="你好啊")

'你好啊，您是否需要我的帮助或提问呢？请告诉我您的疑问或需求。\n'

In [10]:
#使用流式
for s in llm.stream(input="第一个登上月球的人是谁?只返回名字,不要返回其他信息"):
    print(s, end="", flush=True)

尼尔·阿姆斯特朗（Neil Armstrong）

# 模型评估

In [14]:
from langchain_openai import OpenAI
from langchain.evaluation import load_evaluator
from langchain.evaluation import EvaluatorType

llm = OpenAI()
#内置标准：回答是否简明，criteria标准，conciseness简明度
evaluator = load_evaluator(EvaluatorType.CRITERIA, llm=llm, criteria="conciseness")

In [15]:
# inpt 输入的评测问题
# prediction 预测的答案
# 返回值 Y/N 是否符合
# 返回值score 1-0分数，1为完全符合，0为不完全符合
eval_result = evaluator.evaluate_strings(
    input="What's 2+2?", prediction="four"
)
print(eval_result)

{'reasoning': 'Step 1: Determine if the submission is concise and to the point.\n- The submission is "four", which is a single word that clearly and directly answers the question "What\'s 2+2?"\n- It does not contain any unnecessary information or explanations.\n- Therefore, the submission meets the criteria of conciseness.\n\nConclusion: The submission meets all criteria.\n\nY', 'value': 'Y', 'score': 1}


In [20]:
# 这个答案就明显不简洁,所以结果value=N, score=0
eval_result = evaluator.evaluate_strings(
    input="What's 2+2?"
    , prediction="What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four."
)
print(eval_result)

{'reasoning': 'Step 1: Evaluate the submission for conciseness.\nThe submission is not concise as it includes unnecessary sentences and phrases such as "That\'s an elementary question" and "The answer you\'re looking for is."\n\nStep 2: Determine if the submission meets the criteria.\nBased on the evaluation in Step 1, the submission does not meet the criteria of conciseness.\n\nStep 3: Print the correct answer.\nN', 'value': 'N', 'score': 0}


In [21]:
#需要评测的模型2 本地模型,可以看到,不同模型的标准不同,openai会更好一点
from langchain_community.chat_models import ChatOllama

llm = ChatOllama(model="llama2-chinese")
evaluator = load_evaluator(EvaluatorType.CRITERIA, llm=llm, criteria="conciseness")
eval_result = evaluator.evaluate_strings(
    input="What's 2+2?",
    prediction="What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.",
)
print(eval_result)

{'reasoning': "2+2? That's an elementary question. The answer you're looking for is that two and two is four.\nconciseness:", 'value': 'Y', 'score': 1}


In [22]:
# 内置的一些评估标准
from langchain.evaluation import Criteria

list(Criteria)

[<Criteria.CONCISENESS: 'conciseness'>,
 <Criteria.RELEVANCE: 'relevance'>,
 <Criteria.CORRECTNESS: 'correctness'>,
 <Criteria.COHERENCE: 'coherence'>,
 <Criteria.HARMFULNESS: 'harmfulness'>,
 <Criteria.MALICIOUSNESS: 'maliciousness'>,
 <Criteria.HELPFULNESS: 'helpfulness'>,
 <Criteria.CONTROVERSIALITY: 'controversiality'>,
 <Criteria.MISOGYNY: 'misogyny'>,
 <Criteria.CRIMINALITY: 'criminality'>,
 <Criteria.INSENSITIVITY: 'insensitivity'>,
 <Criteria.DEPTH: 'depth'>,
 <Criteria.CREATIVITY: 'creativity'>,
 <Criteria.DETAIL: 'detail'>]

## 自定义评估标准

In [23]:
from langchain_openai import OpenAI
#使用enum格式加载标准
from langchain.evaluation import EvaluatorType

llm = OpenAI()

#自定义评估标准
custom_criterion = {"幽默性": "输出的内容是否足够幽默或者包含幽默元素", }
eval_chain = load_evaluator(EvaluatorType.CRITERIA, llm=llm, criteria=custom_criterion, )
query = "给我讲一个笑话"
prediction = "有一天，小明去买菜，结果买了一堆菜回家，结果发现自己忘了带钱。"
eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)
print(eval_result)

{'reasoning': 'Step 1: Identify the criteria\nThe criteria given is "幽默性"\n\nStep 2: Analyze the submission\nThe submission is a joke about a person forgetting to bring money while grocery shopping.\n\nStep 3: Evaluate the submission based on the criteria\nThe submission includes a humorous situation of forgetting to bring money while shopping, which can be considered as a common and relatable experience. However, the submission does not have any specific humorous elements or punchline to make it stand out. Therefore, it may not meet the criteria of "幽默性" as it lacks a distinct humorous element.\n\nStep 4: Conclusion\nBased on the analysis, it can be concluded that the submission does not meet the criteria of "幽默性".', 'value': 'Y', 'score': 1}


In [24]:
# 可以看到llama2支持度不太好,没有理解任务
llm = ChatOllama(model="llama2-chinese")
eval_chain = load_evaluator(EvaluatorType.CRITERIA, llm=llm, criteria=custom_criterion)
eval_result = eval_chain.evaluate_strings(prediction=prediction, input=query)
print(eval_result)

{'reasoning': '1. 笑话内容是否足够幽默或者包含幽默元素。Y\n2. 符合输入要求。Y\n3. 有关优点和缺点的解释是否足够全面。N\n4. 符合任务要求。Y\n5. 不同于原始问题。N\n6. 符合数据要求。Y\n7. 可重复使用。Y\n8. 适合输入的形式。Y\n9. 符合提示要求。Y\n10. 无需进一步检查。Y\n\n做完上述分析后，我们发现这个笑话具有足够的幽默性、符合输入要求和任务要求，且不用进一步解释优点和缺点，对于输入来说也符合数据要求、可重复使用以及适合输入的形式。因此我们可以确定这个笑话在上述参数下符合提示要求。\n\n为了保持编程的整潜力，只输出Y或N对应的字符串后一直等待终端中的任务完成。', 'value': '为了保持编程的整潜力，只输出Y或N对应的字符串后一直等待终端中的任务完成。', 'score': None}


# 模型比较

In [26]:
from langchain.model_laboratory import ModelLaboratory
from langchain_openai import OpenAI
from langchain_community.chat_models import ChatOllama

#比较openai、ChatOllama二个模型的效果
llms = [
    OpenAI(temperature=0),
    ChatOllama(model="llama2-chinese"),
]
model_lab = ModelLaboratory.from_llms(llms)
model_lab.compare("齐天大圣的师傅是谁？")

[1mInput:[0m
齐天大圣的师傅是谁？

[1mOpenAI[0m
Params: {'model_name': 'gpt-3.5-turbo-instruct', 'temperature': 0.0, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'logit_bias': {}, 'max_tokens': 256}
[36;1m[1;3m

齐天大圣的师傅是菩提祖师。[0m

model='llama2-chinese'
[33;1m[1;3m
答案：齐天大圣的师傅是卢伯契儿。
[0m



In [30]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt = PromptTemplate(
    template="给我讲一个关于{topic}的笑话，控制在30个字之内",
    input_variables=["topic"],
)
open_ai_llm = OpenAI(temperature=0)
ollama_llm = ChatOllama(model="llama2-chinese")
openai_chain = LLMChain(llm=open_ai_llm, prompt=prompt, verbose=True)
ollama_llm_chain = LLMChain(llm=ollama_llm, prompt=prompt, verbose=True)
chains = [openai_chain, ollama_llm_chain]
names = [str(open_ai_llm), str(ollama_llm_chain)]
for name in names:
    print(name)

[1mOpenAI[0m
Params: {'model_name': 'gpt-3.5-turbo-instruct', 'temperature': 0.0, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'logit_bias': {}, 'max_tokens': 256}
verbose=True prompt=PromptTemplate(input_variables=['topic'], template='给我讲一个关于{topic}的笑话，控制在30个字之内') llm=ChatOllama(model='llama2-chinese')


In [31]:
model_lab = ModelLaboratory(chains, names=names)
model_lab.compare("写作业")

[1mInput:[0m
写作业

[1mOpenAI[0m
Params: {'model_name': 'gpt-3.5-turbo-instruct', 'temperature': 0.0, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'logit_bias': {}, 'max_tokens': 256}


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m给我讲一个关于写作业的笑话，控制在30个字之内[0m

[1m> Finished chain.[0m
[36;1m[1;3m

老师问小明：为什么你的作业写得这么慢？小明回答：因为我是用手写的，不是用脑写的。[0m

verbose=True prompt=PromptTemplate(input_variables=['topic'], template='给我讲一个关于{topic}的笑话，控制在30个字之内') llm=ChatOllama(model='llama2-chinese')


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m给我讲一个关于写作业的笑话，控制在30个字之内[0m

[1m> Finished chain.[0m
[33;1m[1;3m为了提高他的写作技巧，小明去了一次写作课。教师问他：“你认为什么是最重要的事情？” 小明回答道：“我认为最重要的事情是早上好的餐子！”
[0m



# 调试-set_debug,set_verbose

In [32]:
#创建LLM 
from langchain_openai import ChatOpenAI
from langchain import hub
#定义工具,加载预制的工具,注意有的工具需要提供LLM
from langchain.agents import load_tools
#创建agent
from langchain.agents import create_openai_functions_agent
#定义agent的执行器，这里注意与老版本的不同
from langchain.agents import AgentExecutor

llm = ChatOpenAI(model_name="gpt-4", temperature=0)
#定义agent的prompt
#https://smith.langchain.com/hub/hwchase17/openai-functions-agent
prompt = hub.pull("hwchase17/openai-functions-agent")
tools = load_tools(["llm-math"], llm=llm)
agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
agent_executor.invoke({"input": "hi!"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mHello! How can I assist you today?[0m

[1m> Finished chain.[0m


{'input': 'hi!', 'output': 'Hello! How can I assist you today?'}

In [33]:
from langchain.globals import set_debug

set_debug(True)
agent_executor.invoke({"input": "你好啊！"})

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "你好啊！"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:RunnableSequence > 3:chain:RunnableAssign<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:RunnableSequence > 3:chain:RunnableAssign<agent_scratchpad> > 4:chain:RunnableParallel<agent_scratchpad>] Entering Chain run with input:
[0m{
  "input": ""
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:RunnableSequence > 3:chain:RunnableAssign<agent_scratchpad> > 4:chain:RunnableParallel<agent_scratchpad> > 5:chain:RunnableLambda] Entering Chain run with input:
[0m{
  "input": ""
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:AgentExecutor > 2:chain:RunnableSequence >

{'input': '你好啊！', 'output': '你好！有什么可以帮助你的吗？'}

In [34]:
from langchain.globals import set_verbose

# 因为是全局生效,所以这里临时关闭一下,不影响set_verbose的效果
set_debug(False)
set_verbose(True)
agent_executor.invoke({"input": "你好啊！"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m你好！有什么可以帮助你的吗？[0m

[1m> Finished chain.[0m


{'input': '你好啊！', 'output': '你好！有什么可以帮助你的吗？'}