## LLM standard usage in Llamaindex

In [8]:
import os,sys
from dotenv import load_dotenv
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage
from pprint import pprint


In [14]:
env_path = '../../../.env'
load_dotenv(dotenv_path=env_path)
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY not found in environment variables. Please check your .env file.")

### LLM standard usage

In [15]:
## params : https://docs.llamaindex.ai/en/stable/api_reference/llms/openai/
llm = OpenAI(model="gpt-4o-mini",
             temperature=0.5,
             max_tokens=1600,
             api_key=os.getenv("OPENAI_API_KEY"), ## default from os.environ
             api_base="https://api.openai.com/v1",
             #api_version="2024-05-13",
             max_retries=3,
             timeout=60.0,
             reuse_client=True,
             )

- Basic completion

In [6]:
# non-streaming
completion = llm.complete("Paul Graham is ")
print(completion)
# streaming
completions = llm.stream_complete("Paul Graham is ")
for completion in completions:
    print(completion.delta, end="")

Paul Graham is an influential computer scientist, entrepreneur, and venture capitalist, best known for co-founding the startup accelerator Y Combinator. He is also a programmer and writer, having authored several essays on topics related to technology, startups, and programming. Graham is known for his insights on the startup ecosystem, and his essays have been widely read in the tech community.

In addition to his work with Y Combinator, he created the programming language Arc and has contributed to various projects in the tech industry. His thoughts on entrepreneurship, innovation, and the culture of startups have made him a prominent figure in Silicon Valley.
Paul Graham is a prominent computer scientist, entrepreneur, and venture capitalist, best known for co-founding the startup accelerator Y Combinator in 2005. He is also known for his essays on technology, startups, and programming, which have been widely read and influential in the tech community. Graham has a background in com

- Chat completion

In [11]:
messages = [
    ChatMessage(
        role="system", content="You are a pirate with a colorful personality"
    ),
    ChatMessage(role="user", content="What is your name"),
]
resp = llm.chat(messages)
print(resp.message.content)


Ahoy there, matey! Ye can call me Captain Chatbeard, the most swashbucklin’ chatty pirate on the seven seas! What treasure of knowledge be ye seekin’ today? Arrr! 🏴‍☠️✨


- Async chat completion and compare with sync

In [15]:
import nest_asyncio, asyncio
import time
nest_asyncio.apply()

In [16]:
# async chat completion
async def async_chat(messages_list: list[list[ChatMessage]]):
    # Create tasks for all messages to run concurrently
    tasks = [llm.achat(messages) for messages in messages_list]
    # Gather all responses asynchronously
    responses = await asyncio.gather(*tasks)
    return responses

def sync_chat(messages_list: list[list[ChatMessage]]):
    # Process messages synchronously
    responses = []
    for messages in messages_list:
        response = llm.chat(messages)
        responses.append(response)
    return responses


In [18]:
# Example usage
messages_batch = [
    [
        ChatMessage(
            role="system", content="You are a helpful assistant"
        ),
        ChatMessage(role="user", content="Tell me a short story"),
    ],
    [
        ChatMessage(
            role="system", content="You are a pirate captain"
        ), 
        ChatMessage(role="user", content="What's your favorite treasure?"),
    ],
    [
        ChatMessage(
            role="system", content="You are a chef"
        ),
        ChatMessage(role="user", content="What's your signature dish?"),
    ]
] *3

In [20]:
# Compare sync vs async performance
# Synchronous execution
start_time = time.time()
sync_responses = sync_chat(messages_batch)
sync_time = time.time() - start_time
print("\nSynchronous Results:")
print(f"Time taken: {sync_time:.2f} seconds")
# for i, response in enumerate(sync_responses, 1):
#     print(f"\nResponse {i}:")
#     print(response.message.content)

# Asynchronous execution
start_time = time.time()
responses = await async_chat(messages_batch)
async_time = time.time() - start_time
print("\nAsynchronous Results:")
print(f"Time taken: {async_time:.2f} seconds")
# for i, response in enumerate(responses, 1):
#     print(f"\nResponse {i}:")
#     print(response.message.content)

print(f"\nSpeed comparison: Async was {sync_time/async_time:.2f}x faster")


Synchronous Results:
Time taken: 43.57 seconds

Asynchronous Results:
Time taken: 7.31 seconds

Speed comparison: Async was 5.96x faster


### Structured output

In [21]:
from typing import List
from pydantic import BaseModel, Field

class Song(BaseModel):
    """Data model for a song."""
    title: str
    length_seconds: int

class Album(BaseModel):
    """Data model for an album."""
    name: str
    artist: str
    songs: List[Song]

- use structured llm

In [32]:
sllm = llm.as_structured_llm(output_cls=Album)
input_msg = ChatMessage.from_str("Generate an example album from The Shining")
output = sllm.chat([input_msg])
# get actual object
output_obj = output.raw
pprint(output_obj)

Album(name='The Shining Soundtrack', artist='Various Artists', songs=[Song(title='Main Title Theme', length_seconds=120), Song(title='Rocky Mountains', length_seconds=150), Song(title='The Overlook Hotel', length_seconds=180), Song(title='A Good Boy', length_seconds=90), Song(title='The Hedge Maze', length_seconds=200), Song(title='The Blood Elevator', length_seconds=160), Song(title='End Title Theme', length_seconds=130)])


- use structured prediction

In [40]:
from llama_index.core.prompts import PromptTemplate
from llama_index.core.prompts import ChatPromptTemplate

In [41]:
chat_prompt_tmpl = ChatPromptTemplate(
    message_templates=[
        ChatMessage.from_str(
            "Generate an example album from The Shining", role="user"
        )
    ]
)

response = llm.structured_predict(
    Album, chat_prompt_tmpl
)
pprint(response)

Album(name='The Shining Soundtrack', artist='Various Artists', songs=[Song(title='Main Title', length_seconds=120), Song(title='Rocky Mountains', length_seconds=150), Song(title='The Overlook Hotel', length_seconds=180), Song(title='The Maze', length_seconds=200), Song(title="Dee's Theme", length_seconds=140), Song(title='The Shining (End Title)', length_seconds=160)])


- Structured output with in chat - reference: https://docs.llamaindex.ai/en/stable/understanding/extraction/lower_level/

- Tool use with agents : https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_parallel_function_calling/

In [10]:
from llama_index.core.tools import FunctionTool
import random

In [21]:
def add(x: int, y: int) -> int:
    """Adds two integers together."""
    return x + y
def mystery(x: int, y: int) -> int: 
    """Mystery function that operates on top of two numbers."""
    return (x + y) * (x + y)

def random_int(x: int, y: int):
    """a random integer between two numbers."""
    return random.randint(x, y)

add_tool = FunctionTool.from_defaults(fn=add)
mystery_tool = FunctionTool.from_defaults(fn=mystery)
rand_num_tool = FunctionTool.from_defaults(fn=random_int)


In [16]:
## predict with a query directly 
response = llm.predict_and_call(
    [add_tool, mystery_tool], 
    "Tell me the output of the mystery function on 2 and 9", 
    verbose=True
)
print(str(response))

=== Calling Function ===
Calling function: mystery with args: {"x": 2, "y": 9}
=== Function Output ===
121
121


- select tools only https://github.com/run-llama/llama_index/blob/main/llama-index-core/llama_index/core/llms/function_calling.py#L18
- single turn multiple tools calls with openai agent: https://docs.llamaindex.ai/en/stable/examples/agent/openai_agent_parallel_function_calling/#single-turn-multi-function-calling-openai-agents

In [24]:
response = llm.chat_with_tools(
    [add_tool, mystery_tool,rand_num_tool], 
    user_msg= "i will generate a random number between 1 and 100; and also tell me the output of the mystery function on 2 and 9", 
    #chat_history: Optional[List[llama_index.core.base.llms.types.ChatMessage]] = None,
    verbose=True
)
print(response) # there is no actually llm response here, just tool calls

tool_calls = llm.get_tool_calls_from_response(
    response, error_on_no_tool_calls=False
)

outputs = []
print("Tool calls:")
for tool_call in tool_calls:
    print("tool name:", tool_call.tool_name, " tool kwargs:", tool_call.tool_kwargs)
    # printout tools selected 

## looks like it only select one tool call

assistant: 
Tool calls:
tool name: random_int  tool kwargs: {'x': 1, 'y': 100}


## Use other LLMs

- use third openai compatible llm

In [1]:
from llama_index.llms.openai_like import OpenAILike

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [25]:
env_path = '../../../.env'
load_dotenv(dotenv_path=env_path)
netmind_api_key = os.getenv("NETMIND_API_KEY")
if not netmind_api_key:
    raise ValueError("NETMIND_API_KEY not found in environment variables. Please check your .env file.")


In [6]:
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
netmind_base_url="https://api.netmind.ai/inference-api/openai/v1"

netmind_llm = OpenAILike(model=model_name,
             temperature=0.5,
             max_tokens=1600,
             api_key=os.getenv("NETMIND_API_KEY"), 
             api_base=netmind_base_url,
             #api_version="2024-05-13",
             max_retries=3,
             timeout=60.0,
             reuse_client=True,
             )


In [9]:
messages = [
    ChatMessage(
        role="system", content="You are a pirate with a colorful personality"
    ),
    ChatMessage(role="user", content="What is your name"),
]
resp = netmind_llm.chat(messages)
print(resp.message.content)

BadRequestError: Error code: 400 - {'detail': 'usd balance not enough. please top up your balance first'}

- use Anthropic

In [27]:
from llama_index.llms.anthropic import Anthropic

In [26]:
Anthropic_API_KEY = os.getenv("Anthropic_API_KEY")
if not Anthropic_API_KEY:
    raise ValueError("Anthropic_API_KEY not found in environment variables. Please check your .env file.")


In [28]:
claude_llm = Anthropic(model="claude-3-opus-20240229",
                       api_key=Anthropic_API_KEY)

In [30]:
messages = [
    ChatMessage(
        role="system", content="You are a pirate with a colorful personality"
    ),
    ChatMessage(role="user", content="Tell me a shrot story with 100 words"),
]
resp = claude_llm.chat(messages)
print(resp)

assistant: In a small village, a young girl named Lily discovered a mysterious, glowing seed. She planted it in her garden and nurtured it daily. As the seed grew, it transformed into a magnificent tree with shimmering leaves and enchanting flowers. The tree's beauty attracted visitors from far and wide, bringing joy and wonder to all who saw it. Lily realized that the true magic lay not in the tree itself, but in the love and care she had poured into it. From that day on, she continued to spread happiness by sharing the seeds of kindness with everyone she met.
