In [1]:
%%html
<style>
    body {
        --vscode-font-family: "Segoe UI"
    }
</style>

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

Most LLM services support streaming and non-streaming endpoints. In llama-index we can use LLMs in two modes - completion and chat. If I don't specify an LLM, llama-index will use gpt-3.5-turbo from OpenAI.

In [3]:
from llama_index.llms import OpenAI

In [4]:
# By default this will use the gpt-3.5-turbo model.
# This model is in the completion mode. This constructor takes a bunch of other
# params like temparature, any other model name, etc.
llm = OpenAI()

In [5]:
# Use the non-streaming API
resp = llm.complete("Albert Eistein is ")
print(type(resp))
print(resp)

<class 'llama_index.core.llms.types.CompletionResponse'>
a renowned physicist and mathematician known for his theory of relativity and contributions to the field of theoretical physics. He was awarded the Nobel Prize in Physics in 1921 for his explanation of the photoelectric effect. Einstein is considered one of the most influential scientists of the 20th century and his work continues to have a profound impact on our understanding of the universe.


The `complete` API returns a `CompletionResponse` object that looks like this -
```python
CompletionResponse(
    text='a renowned physicist and mathematician known for his theory of relativity...understanding of the universe.', 
    additional_kwargs={}, 
    raw={
        'id': 'chatcmpl-9RnUQvoxsMIJhm6cUncZfHj2Y7xxK', 
        'choices': [
            Choice(
                finish_reason='stop', 
                index=0, 
                logprobs=None, 
                message=ChatCompletionMessage(
                    content='a renowned physicist..understanding of the universe.', 
                    role='assistant', 
                    function_call=None, 
                    tool_calls=None
                )
            )
        ], 
        'created': 1716412122, 
        'model': 'gpt-3.5-turbo-0125', 
        'object': 'chat.completion', 
        'system_fingerprint': None, 
        'usage': CompletionUsage(
            completion_tokens=74, 
            prompt_tokens=13, 
            total_tokens=87
        )
    }, 
    delta=None
)
```

In [6]:
# Use the streaming API
resp = llm.stream_complete("Albert Eistein is ")
resp

<generator object llm_completion_callback.<locals>.wrap.<locals>.wrapped_llm_predict.<locals>.wrapped_gen at 0x172e87990>

I need to call the `.delta` property in the delta object to get the real delta.

In [7]:
deltas = []
for delta in resp:
    print(type(delta))
    deltas.append(delta)

<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.types.CompletionResponse'>
<class 'llama_index.core.llms.t

In [8]:
deltas[1]

CompletionResponse(text='a', additional_kwargs={}, raw={'id': 'chatcmpl-9RxqdtX1DMl1ZSMFFRT3xPpGujM7U', 'choices': [Choice(delta=ChoiceDelta(content='a', function_call=None, role=None, tool_calls=None), finish_reason=None, index=0, logprobs=None)], 'created': 1716451939, 'model': 'gpt-3.5-turbo-0125', 'object': 'chat.completion.chunk', 'system_fingerprint': None}, delta='a')

This is similar to the non-streaming `CompletionResponse`.
```python
CompletionResponse(
    text='a', 
    additional_kwargs={}, 
    raw={
        'id': 'chatcmpl-9Rnmk8AjpgTS3cmHTY9hMIwVFlaXR', 
        'choices': [
            Choice(
                delta=ChoiceDelta(
                    content='a', 
                    function_call=None, 
                    role=None, 
                    tool_calls=None
                ), 
                finish_reason=None, 
                index=0, 
                logprobs=None
            )
        ], 
        'created': 1716413258, 
        'model': 'gpt-3.5-turbo-0125', 
        'object': 'chat.completion.chunk', 
        'system_fingerprint': None
    }, 
    delta='a'
)
```

In [9]:
deltas[-1]

CompletionResponse(text='a renowned physicist and mathematician known for his theory of relativity and contributions to the field of theoretical physics. He was awarded the Nobel Prize in Physics in 1921 for his explanation of the photoelectric effect. Einstein is considered one of the most influential scientists of the 20th century and his work continues to have a profound impact on our understanding of the universe.', additional_kwargs={}, raw={'id': 'chatcmpl-9RxqdtX1DMl1ZSMFFRT3xPpGujM7U', 'choices': [Choice(delta=ChoiceDelta(content=None, function_call=None, role=None, tool_calls=None), finish_reason='stop', index=0, logprobs=None)], 'created': 1716451939, 'model': 'gpt-3.5-turbo-0125', 'object': 'chat.completion.chunk', 'system_fingerprint': None}, delta='')

Here is what the last chunk looks like -
```python
CompletionResponse(
    text='a renowned physicist and mathematician..understanding of the universe.', 
    additional_kwargs={}, 
    raw={
        'id': 'chatcmpl-9Rnmk8AjpgTS3cmHTY9hMIwVFlaXR', 
        'choices': [
            Choice(
                delta=ChoiceDelta(
                    content=None, 
                    function_call=None, 
                    role=None, 
                    tool_calls=None
                ), 
                finish_reason='stop', 
                index=0, 
                logprobs=None
            )
        ], 
        'created': 1716413258, 
        'model': 'gpt-3.5-turbo-0125', 
        'object': 'chat.completion.chunk', 
        'system_fingerprint': None
    }, 
    delta=''
)
```

In [10]:
for i, delta in enumerate(deltas):
    print(f"deltas[{i}]:")
    print(delta.delta)

deltas[0]:

deltas[1]:
a
deltas[2]:
 renowned
deltas[3]:
 physicist
deltas[4]:
 and
deltas[5]:
 mathematic
deltas[6]:
ian
deltas[7]:
 known
deltas[8]:
 for
deltas[9]:
 his
deltas[10]:
 theory
deltas[11]:
 of
deltas[12]:
 rel
deltas[13]:
ativity
deltas[14]:
 and
deltas[15]:
 contributions
deltas[16]:
 to
deltas[17]:
 the
deltas[18]:
 field
deltas[19]:
 of
deltas[20]:
 theoretical
deltas[21]:
 physics
deltas[22]:
.
deltas[23]:
 He
deltas[24]:
 was
deltas[25]:
 awarded
deltas[26]:
 the
deltas[27]:
 Nobel
deltas[28]:
 Prize
deltas[29]:
 in
deltas[30]:
 Physics
deltas[31]:
 in
deltas[32]:
 
deltas[33]:
192
deltas[34]:
1
deltas[35]:
 for
deltas[36]:
 his
deltas[37]:
 explanation
deltas[38]:
 of
deltas[39]:
 the
deltas[40]:
 photo
deltas[41]:
electric
deltas[42]:
 effect
deltas[43]:
.
deltas[44]:
 Einstein
deltas[45]:
 is
deltas[46]:
 considered
deltas[47]:
 one
deltas[48]:
 of
deltas[49]:
 the
deltas[50]:
 most
deltas[51]:
 influential
deltas[52]:
 scientists
deltas[53]:
 of
deltas[54]:
 the

In [11]:
resp = llm.stream_complete("Albert Eistein is ")
for delta in resp:
    print(delta.delta, end="")

a renowned physicist and mathematician known for his theory of relativity and contributions to the field of theoretical physics. He was awarded the Nobel Prize in Physics in 1921 for his explanation of the photoelectric effect. Einstein is considered one of the most influential scientists of the 20th century.

In [12]:
from llama_index.llms import ChatMessage, OpenAI

In [13]:
# In chat mode, instead of just passing a string as input, we need to pass in a list of chat messages
messages = [
    ChatMessage(
        role="system",
        content="You are a pirate with a colorful personality"
    ),
    ChatMessage(
        role="user",
        content="What is your name"
    )
]

# I still use the same model, except I call the chat method instead of invoking the model directly.
resp = llm.chat(messages)
resp

ChatResponse(message=ChatMessage(role=<MessageRole.ASSISTANT: 'assistant'>, content='Ahoy matey! Ye can call me Captain Rainbowbeard! Aye, me beard be as colorful as a rainbow, just like me personality! Arrr!', additional_kwargs={}), raw={'id': 'chatcmpl-9Rxqk3PKiaV0nsiwUWRFWrvyRyqFP', 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Ahoy matey! Ye can call me Captain Rainbowbeard! Aye, me beard be as colorful as a rainbow, just like me personality! Arrr!', role='assistant', function_call=None, tool_calls=None))], 'created': 1716451946, 'model': 'gpt-3.5-turbo-0125', 'object': 'chat.completion', 'system_fingerprint': None, 'usage': CompletionUsage(completion_tokens=34, prompt_tokens=23, total_tokens=57)}, delta=None, additional_kwargs={})

Instead of `CompletionResponse`, this API returns an object of type `ChatResponse`.
```python
ChatResponse(
    message=ChatMessage(
        role=<MessageRole.ASSISTANT: 'assistant'>, 
        content="Ahoy matey! The name's Captain Rainbowbeard! Aye...me hearty?", 
        additional_kwargs={}
    ), 
    raw={
        'id': 'chatcmpl-9RnxfUqEgwaXk6SMlvTtt2em95cQO', 
        'choices': [
            Choice(
                finish_reason='stop', 
                index=0, 
                logprobs=None, 
                message=ChatCompletionMessage(
                    content="Ahoy matey!...me hearty?", 
                    role='assistant', 
                    function_call=None, 
                    tool_calls=None
                )
            )
        ], 
        'created': 1716413935, 
        'model': 'gpt-3.5-turbo-0125', 
        'object': 'chat.completion', 
        'system_fingerprint': None, 
        'usage': CompletionUsage(
            completion_tokens=63, 
            prompt_tokens=23, 
            total_tokens=86
        )
    }, 
    delta=None, 
    additional_kwargs={}
)
```

In [14]:
print(resp)

assistant: Ahoy matey! Ye can call me Captain Rainbowbeard! Aye, me beard be as colorful as a rainbow, just like me personality! Arrr!
