In [2]:
import asyncio
from typing import Any, Dict, List

from langchain.callbacks.base import AsyncCallbackHandler, BaseCallbackHandler
from langchain_core.messages import HumanMessage
from langchain_core.outputs import LLMResult
from langchain_openai import ChatOpenAI

openai_api_key = "EMPTY"
openai_api_base = "http://localhost:11434/v1"
class MyCustomSyncHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        print(f"Sync handler being called in a `thread_pool_executor`: token: {token}")


class MyCustomAsyncHandler(AsyncCallbackHandler):
    """Async callback handler that can be used to handle callbacks from langchain."""

    async def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> None:
        """Run when chain starts running."""
        print("zzzz....")
        await asyncio.sleep(0.3)
        class_name = serialized["name"]
        print("LLM正在启动")

    async def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
        """Run when chain ends running."""
        print("zzzz....")
        await asyncio.sleep(0.3)
        print("LLM结束")


# To enable streaming, we pass in `streaming=True` to the ChatModel constructor
# Additionally, we pass in a list with our custom handler
 
chat = ChatOpenAI(
    openai_api_key = openai_api_key, 
    openai_api_base = openai_api_base,
    temperature=0, 
    streaming=True,
    callbacks=[MyCustomSyncHandler(), MyCustomAsyncHandler()],)


await chat.agenerate([[HumanMessage(content="告诉我北京的特产")]])

zzzz....
LLM正在启动
Sync handler being called in a `thread_pool_executor`: token: <think>
Sync handler being called in a `thread_pool_executor`: token: 

Sync handler being called in a `thread_pool_executor`: token: 嗯
Sync handler being called in a `thread_pool_executor`: token: ，
Sync handler being called in a `thread_pool_executor`: token: 用户
Sync handler being called in a `thread_pool_executor`: token: 问
Sync handler being called in a `thread_pool_executor`: token: 的是
Sync handler being called in a `thread_pool_executor`: token: 北京
Sync handler being called in a `thread_pool_executor`: token: 的
Sync handler being called in a `thread_pool_executor`: token: 特产
Sync handler being called in a `thread_pool_executor`: token: ，
Sync handler being called in a `thread_pool_executor`: token: 我
Sync handler being called in a `thread_pool_executor`: token: 需要
Sync handler being called in a `thread_pool_executor`: token: 先
Sync handler being called in a `thread_pool_executor`: token: 回忆
Sync handle

LLMResult(generations=[[ChatGeneration(text='<think>\n嗯，用户问的是北京的特产，我需要先回忆一下北京有哪些著名的特产。首先，北京作为首都，有很多地方特产，比如京味小吃，比如炸酱面、豆汁、卤煮这些。还有可能提到的北京烤鸭，虽然可能不是特产，但很出名。另外，像故宫周边的特产，比如故宫文创产品，但可能用户更想知道传统的特产。\n\n然后，我需要确认这些特产的正确性。比如，炸酱面确实是北京的特色，豆汁也是，但要注意是否属于特产。还有像北京的果脯，比如大果脯，可能也是特产。另外，北京的茶叶，比如龙井茶，但龙井茶是杭州的，所以可能不是北京的特产。不过北京也有自己的茶叶，比如祁门红茶，但可能用户更想知道北京本地的。\n\n还有可能提到的北京的糕点，比如豆饼、京糕，或者像北京的腊八粥，虽然可能不是特产，但很受欢迎。另外，像北京的烤鸭，虽然属于北京的名菜，但可能用户会认为是特产，不过需要确认是否属于特产。\n\n另外，可能需要区分特产和名菜，比如烤鸭是名菜，但可能不是特产。所以需要明确回答，哪些是特产，哪些是名菜。比如，炸酱面、豆汁、卤煮、京糕、大果脯、北京烤鸭、故宫文创产品等。\n\n还要注意用户可能的深层需求，比如他们可能想了解北京的特色美食，或者旅游时的推荐。所以回答时要涵盖这些方面，同时给出简要的介绍，帮助用户了解。\n\n最后，检查是否有遗漏的重要特产，比如北京的腊八粥，或者像北京的果脯，可能还有其他如北京的糖葫芦，但可能不太常见。需要确保信息准确，避免错误。\n</think>\n\n北京作为中国的首都，拥有丰富的特产和美食文化，以下是一些最具代表性的北京特产：\n\n---\n\n### **一、传统小吃与美食**\n1. **炸酱面**  \n   - 京味经典，以鸡蛋、肉末、葱花等为配料，酱汁浓郁，是北京人日常饮食的代表。\n\n2. **豆汁**  \n   - 用黄豆发酵制成，味道独特，带有豆腥味，是北京人早餐的“传统”选择，常搭配炸酱面。\n\n3. **卤煮**  \n   - 以猪肉为主料，用卤水腌制，口感鲜香，是北京街头小吃的代表。\n\n4. **京糕（京饼）**  \n   - 传统糕点，以面粉、豆沙、枣泥等为馅，口感软糯，是北京传统节日的必备食品。\n\n5. **大果脯**  \n   - 以