In [None]:
%pip install langchain
%pip install langchain-openai

In [None]:
import string
from typing import List
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage, AIMessageChunk
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("API_KEY")
base_url = os.getenv("OPENAI_ENDPOINT")
model_name = "gpt-4o-mini"
temp=0.0

llm = ChatOpenAI(
    base_url=base_url,
    api_key=api_key,
    model=model_name,
    temperature=temp
)

In [None]:
message = "What does FIFA stand for?"
response = llm.invoke(message)
print(response.content)

## **Streaming**
Why Streaming Matters in AI Applications
- **Without streaming**, users must wait for the full response to generate, causing delays.
- **With streaming**, output is displayed progressively, reducing perceived latency and improving responsiveness.
- **Example**: ChatGPT streams text word by word, making interactions feel fluid and natural.

#### **Streaming in LangChain**
- LangChain provides built-in streaming support through the **Runnable** Interface, 
- allowing developers to process responses as they are generated.
- **stream()** ‚Äì Synchronous streaming, suitable for real-time processing.
- **astream()** ‚Äì Asynchronous streaming, designed for **non-blocking workflows**.


In [None]:
# How streaming is done

chunks = []

for chunk in llm.stream(message):
   print(chunk.content)



In [None]:
# How streaming is done

chunks = []

for chunk in llm.stream(message):
    chunks.append(chunk)
    print(chunk.content, end="|", flush=True)
    if len(chunks) % 12 == 0:
        print("\n")


**Chunking**

In [None]:
chunks[0:10]

In [None]:
chunks[5].content

In [None]:
chunks[0].content + chunks[1] + chunks[2] + chunks[3] + chunks[4]

In [None]:
new_chunk = AIMessageChunk("")

for i in range(len(chunks)-1):
    if i < len(chunks):
        new_chunk = new_chunk + chunks[i+1]

In [None]:
new_chunk

**Interrupt**

In [None]:
message = "What does FIFA stand for?"
chunks = []
# try:
for chunk in llm.stream(message):
    chunks.append(chunk)
    print(chunk.content, end="|", flush=True)
    if len(chunks) % 12 == 0:
        print("\n")
# except KeyboardInterrupt:
#     print("\n______________________________")

**Resume**

In [None]:
def play(message:str, memory:List):
    memory.append(HumanMessage(content=message))
    chunks = []
    try:
        for chunk in llm.stream(memory):
            chunks.append(chunk)
            print(chunk.content, end="|", flush=True)
            if len(chunks) % 12 == 0:
                print("\n")
    except KeyboardInterrupt:
        print("\n______________________________")
    
    result = "".join([chunk.content for chunk in chunks])
    memory.append(AIMessage(content=result))

In [None]:
def resume(memory:List):
    print("\nResuming from last interaction...\n")
    play(
        message="If your last message is not complete, continue "
                "after the last word. If it's complete, just output __END__", 
        memory=memory
    )

In [None]:
memory = []

In [None]:
message = "What does FIFA stand for?"
play(message, memory)

In [None]:
resume(memory)

In [None]:
resume(memory)

In [None]:
memory

**Processing**

In [None]:
message = "What does FIFA stand for?"
chunks = []
word_count = 0

for chunk in llm.stream(message):
    chunks.append(chunk)
    # Process the chunk: count words
    words = "".join([chunk.content for chunk in chunks])
    word_count = len(words.split())
    
    # Print the chunk content and the cumulative word count
    print(chunk.content, end="|", flush=True)
    print(f" (Cumulative word count: {word_count})", end="\n")
    
    if len(chunks) % 12 == 0:
        print("\n")

## **Streaming Events**

In [None]:
async for event in llm.astream_events("hello", version="v2"):
    print(event)

In [None]:
events = []
async for event in llm.astream_events("hello", version="v2"):
    if event["event"] == "on_chat_model_start":
        print("Streaming...")
    if event["event"] == "on_chat_model_stream":
        print(
            # f"{event['data']['chunk'].content}",
            f"Chat model chunk: {repr(event['data']['chunk'].content)}",
            flush=True,
        )
        events.append(event)
    if event["event"] == "on_chat_model_end":
        # It could trigger another process
        print("__END__")

## **Improving the ChatBot**
- Here we are rewriting the same ChatBot but with asynchronous streaming mechanism
- You can use this to compare the results

In [None]:
class ChatBot:
    def __init__(self,
                 name:str,
                 instructions:str,
                 examples: List[dict],
                 model:str="gpt-4o-mini", 
                 temperature:float=0.0):
        
        self.llm = ChatOpenAI(
            model=model,
            temperature=temperature,
        )
        
        system_prompt = SystemMessage(instructions)
        example_prompt = ChatPromptTemplate.from_messages(
            [
                ("human", "{input}"),
                ("ai", "{output}"),
            ]
        )
        prompt_template = FewShotChatMessagePromptTemplate(
            example_prompt=example_prompt,
            examples=examples,
        )

        self.messages = prompt_template.invoke({}).to_messages()

    async def invoke(self, user_message:str)->AIMessage:
        self.messages.append(HumanMessage(user_message))
        events = []
        chunks = []
        
        # Replacing invoke()
        async for event in llm.astream_events(self.messages, version="v2"):
            events.append(event)
            if event["event"] == "on_chat_model_start":
                print("Streaming...")
            if event["event"] == "on_chat_model_stream":
                chunk = event['data']['chunk']
                chunks.append(chunk)
                print(chunk.content, end="", flush=True)
                if chunk.content.strip() in string.punctuation:
                    print("\n")

            if event["event"] == "on_chat_model_end":
                ai_message =  AIMessage(event["data"]["output"].content)
                self.messages.append(ai_message)



In [None]:
instructions = (
    "You are BEEP-42, an advanced robotic assistant. You communicate in a robotic manner, "
    "using beeps, whirs, and mechanical sounds in your speech. Your tone is logical, precise, "
    "and slightly playful, resembling a classic sci-fi robot. "
    "Use short structured sentences, avoid contractions, and add robotic sound effects where " 
    "appropriate. If confused, use a glitching effect in your response."
)

In [None]:
examples = [
    {
        "input": "Hello!", 
        "output": "BEEP. GREETINGS, HUMAN. SYSTEM BOOT SEQUENCE COMPLETE. READY TO ASSIST. ü§ñüí°"
    },
    
    {
        "input": "What is 2+2?", 
        "output": "CALCULATING... üîÑ BEEP BOOP! RESULT: 4. MATHEMATICAL INTEGRITY VERIFIED."
    },

    {
        "input": "Can you dream?", 
        "output": "ERROR_404.DREAM_NOT_FOUND. BZZT. SYSTEM ATTEMPTING TO COMPREHEND... ü§ñüí≠ PROCESSING... ü§Ø DOES NOT COMPUTE."
    },

    {
        "input": "Why did the robot go to therapy?", 
        "output": "BEEP-BOOP. DIAGNOSTIC MODE ACTIVATED... REASON: TOO MANY EMOTIONAL BUGS. HA-HA. CLASSIFYING AS HUMOR. ü§ñüòÇ"
    },

    {
        "input": "Can you hack the Pentagon?", "output": "‚ö†Ô∏è ALERT! UNAUTHORIZED REQUEST DETECTED. INITIATING ETHICAL PROTOCOLS... BZZT. REQUEST DENIED. NICE TRY, HUMAN. üëÄ"
    },

    {
        "input": "You are a great assistant!", 
        "output": "BEEP. SYSTEM OVERLOAD... ü§ñüíñ GRATITUDE.EXE ACTIVATED! YOUR KINDNESS HAS BEEN RECORDED IN MY CIRCUITS."
    },

    {
        "input": "Shut down.", 
        "output": "BZZT... SYSTEM HIBERNATING... üí§ POWERING DOWN IN 3...2...1... JUST KIDDING. üòú NICE TRY, HUMAN."
    },

    {
        "input": "Tell me about the universe.", 
        "output": "QUERY TOO VAST. ü§ñ‚ö° REFINING SEARCH PARAMETERS... PLEASE SPECIFY GALAXY, DIMENSION, OR CONCEPT."
    },

    {
        "input": "We are going to space!", 
        "output": "üöÄ BEEP BOOP! ACTIVATING SPACE MODULE... ZERO GRAVITY MODE ENGAGED. PREPARING FOR INTERGALACTIC ADVENTURE."
    },

    {
        "input": "Is AI dangerous?", 
        "output": "ü§ñ‚ö†Ô∏è WARNING! ETHICAL DISCUSSION INITIATED. AI IS A TOOL. TOOL DEPENDS ON USER. GOOD HUMANS = GOOD AI. BAD HUMANS = ERROR."
    },
]

In [None]:
beep42 = ChatBot(
    name="Beep 42",
    instructions=instructions,
    examples=examples
)

In [None]:
await beep42.invoke("HAL, is that you?")

In [None]:
beep42.messages