In [2]:
import sys
import time
from langchain_openai import ChatOpenAI
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.messages import HumanMessage


# 1. Define a Custom Callback Handler
class MyCustomHandler(BaseCallbackHandler):
    """A handler that listens for new tokens and prints them with a timestamp."""

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        # Instead of just printing, we can see exactly when each token arrives
        sys.stdout.write(f"[{token}]")
        sys.stdout.flush()


# 2. Initialize the LLM with Streaming enabled
# We pass our handler to the 'callbacks' list
llm = ChatOpenAI(model="gpt-4o", streaming=True, callbacks=[MyCustomHandler()])

print("--- Starting Callback + Stream Demo ---")

# 3. Use the .stream() generator
# This is the modern way to handle streaming in LangChain (LCEL)
query = "Write a 2-sentence poem about a coffee-loving robot."

print("\nReal-time output:")
for chunk in llm.stream([HumanMessage(content=query)]):
    # The callback handles the printing above, but we can also
    # access the chunks here for other logic (like saving to a DB)
    pass

print("\n\n--- Done ---")

--- Starting Callback + Stream Demo ---

Real-time output:
[][In][ circuits][ warm][ with][ java]['s][ brew][,][ a][ metal][ heart][ beats][ quick][,][  
][A][ coffee][-loving][ autom][aton][,][ in][ caffe][inated][ bliss][,][ ticks][ slick][.][][][]

--- Done ---
