# Gai/Lib: Examples

## Text-to-Text (TTT)

text generation example

In [None]:
from gai.lib.GGG import GGG
ggg=GGG("../gai-lib/gai.yml")

# Mistral7B
print("> Mistral-7B")
for chunk in ggg(category="ttt",messages="user: Tell me a one paragraph story\nassistant:"):
    print(chunk.decode(),end="",flush=True)
print("\n")

In [None]:
from gai.lib.GGG import GGG
ggg=GGG("../gai-lib/gai.yml")

# GPT4
print("> OpenAI")
for chunk in ggg(category="ttt",generator="gpt-4",messages="user: Tell me a one paragraph story\nassistant:"):
    print(chunk.decode(),end="",flush=True)
print("\n")

In [None]:
print("> OpenAI API")

import os
import openai
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
if not os.environ.get("OPENAI_API_KEY"):
    raise Exception(
        "OPENAI_API_KEY not found in environment variables")
openai.api_key = os.environ["OPENAI_API_KEY"]
client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4",
    messages=[{"role":"user","content":"Tell me a one paragraph story"}],
    stream=True,
    max_tokens=100,
)
for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content,end="",flush=True)

### TTT With Function Call

OpenAPI provides a powerful feature for its API known as Function Calling. Essentially, this is a mechanism for the LLM to seek external assistance when it encounters limitations in its text generation capabilities. It does this by returning a string that emulates the calling of a function, based on the function description provided by the user.

In the following example, we demonstrate function calling  being to an open source model using Mistral7b.

In [None]:
from gai.lib.GGG import GGG
ggg=GGG("../gai-lib/gai.yml")

# Mistral7B
print("> Mistral-7B")
response = ggg(category="ttt",
    messages="user: What is today's date?\nassistant:",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "gg",
                "description": "The 'gg' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "search_query": {
                            "type": "string",
                            "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                        }
                    },
                    "required": ["search_query"]
                }
            }
        }                   
    ],
    stream=False)
print(response.decode())

In [None]:
from gai.lib.GGG import GGG
ggg=GGG("../gai-lib/gai.yml")

# OpenAI
print("> OpenAI")
response = ggg(category="ttt",
                generator="gpt-4",
               messages="user: Who is the current president of Singapore?\nassistant:",
                tools=[
                    {
                        "type": "function",
                        "function": {
                            "name": "gg",
                            "description": "The 'gg' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
                            "parameters": {
                                "type": "object",
                                "properties": {
                                    "search_query": {
                                        "type": "string",
                                        "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                                    }
                                },
                                "required": ["search_query"]
                            }
                        }
                    }                   
                ],
               stream=False)
print(response.decode())

In [None]:
print("> OpenAI Original")
import os,json
import openai
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
if not os.environ.get("OPENAI_API_KEY"):
    raise Exception(
        "OPENAI_API_KEY not found in environment variables")
openai.api_key = os.environ["OPENAI_API_KEY"]
client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4",
    messages=[{"role":"user","content":"Tell me the latest news on Singapore"}],
    stream=True,
    max_tokens=100,
    tools=[
        {
            "type": "function",
            "function": {
                "name": "gg",
                "description": "The 'gg' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "search_query": {
                            "type": "string",
                            "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                        }
                    },
                    "required": ["search_query"]
                }
            }
        }                   
    ],
)
tool = {}
tool["arguments"]=""
for chunk in response:
    if chunk.choices[0].delta.tool_calls and chunk.choices[0].delta.tool_calls[0].function.name:
        tool["name"] = chunk.choices[0].delta.tool_calls[0].function.name
    elif chunk.choices[0].delta.tool_calls and chunk.choices[0].delta.tool_calls[0].function.arguments:
        tool["arguments"] += chunk.choices[0].delta.tool_calls[0].function.arguments
print(json.dumps(tool, indent=4)+"\n")

-----

## Text-to-Speech (TTS)

In [None]:
from gai.common.sound_utils import play_audio,save_audio

data = {
    "input": "The definition of insanity is doing the same thing over and over and expecting different results.",
    "voice": None,
    "language": None
}

In [None]:
from gai.lib.GGG import GGG

ggg=GGG("../gai-lib/gai.yml")
response = ggg("tts", **data)
play_audio(response)

In [None]:
# openai tts
response = ggg("tts", generator="openai-tts-1", **data)
play_audio(response)

In [None]:
# openai original
response = client.audio.speech.create(
    model='tts-1', input="The definition of insanity is doing the same thing over and over and expecting different results.", voice="alloy")
play_audio(response.content)

## Speech-to-Text (STT)

In [None]:
# sample
with open("../gai-lib/tests/lib/stt/today-is-a-wonderful-day.wav", "rb") as f:
    play_audio(f.read())

In [None]:
from gai.lib.GGG import GGG
ggg=GGG("../gai-lib/gai.yml")

# OpenSource Whisper
with open("../gai-lib/tests/lib/stt/today-is-a-wonderful-day.wav", "rb") as f:
    output = ggg("stt", file=f)
    print(output.decode())

In [None]:
# OpenAI Whisper
with open("../gai-lib/tests/lib/stt/today-is-a-wonderful-day.wav", "rb") as f:
    output = ggg("stt", generator="openai-whisper", file=f)
    print(output.text)

## Image-to-Text (ITT)

In [None]:
from gai.common.image_utils import read_to_base64
import os
from IPython.display import Image,display
image_file = os.path.join("../gai-lib/tests/lib/itt", "buses.jpeg")
encoded_string = read_to_base64(image_file)
messages = [
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "What’s in this image?"},
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{encoded_string}",
                },
            },
        ],
    }
]
display(Image(image_file))

In [None]:
# Llava
from gai.lib.GGG import GGG
ggg=GGG("../gai-lib/gai.yml")

print("> Llava")
for chunk in ggg("itt",messages=messages,stream=True):
    print(chunk.decode(),end="",flush=True)
print("\n")


In [None]:
# OpenAI
print("> OpenAI")
for chunk in ggg(category="itt", generator="openai-vision", messages=messages, stream=True, max_tokens=100):
    print(chunk.decode(), end="", flush=True)
print("\n")

## Retrieval Augmented Generation (RAG)

Monitor the progress of the indexing status of the remote service by running the following:

a) /tests/client/rag/function_test_websocket_forwarder.py

b) /tests/client/rag/function_test_websocket_forwarder_listener.py

The first script will forward the status of the remote service to the local port.
The second script will pull the status from the local port and display it.

In [1]:
# Delete "demo" Collection.
from gai.lib.RAGClient import RAGClient
rag = RAGClient("../gai-lib/gai.yml")
rag.delete_collection("demo")

2024-02-19 07:31:31 INFO gai.lib.RAGClient:[32mRAGClient.delete_collection: Deleting collection https://gaiaio.ai/api/gen/v1/rag/collection/demo[0m


{'count': 0}

In [2]:
# List Collections
from gai.lib.RAGClient import RAGClient
rag = RAGClient("../gai-lib/gai.yml")
rag.list_collections()

{'collections': []}

#### Start Listener

Before continuing with the following steps, start an external rag_listener to monitor the progress of the indexing status.

```bash
cd /gai-lib/tests/client/rag
python rag_listener.py
```

#### Start Indexing

In [3]:
# Index 2023 national day speech 
from gai.lib.GGG import GGG
ggg = GGG("../gai-lib/gai.yml")
def updater(status):
    print(status)

data = {
    "collection_name": "demo",
    "file_path": "../gai-lib/tests/clients/rag/pm_long_speech_2023.txt",
    "metadata": {"title": "2023 National Day Rally Speech", 
    "source": "https://www.pmo.gov.sg/Newsroom/national-day-rally-2023"},
}
ggg("index", **data)


{'document_id': 'a7f04c65-5520-4bea-b04b-357ef9996c32'}

In [None]:
# List documents
from gai.lib.RAGClient import RAGClient
rag = RAGClient("../gai-lib/gai.yml")
docs=rag.list_documents("demo")
print(docs)

In [None]:
# get document
from gai.lib.RAGClient import RAGClient
rag = RAGClient("../gai-lib/gai.yml")
rag.get_document(docs['documents'][0]['id'])

In [None]:
from gai.lib.GGG import GGG
ggg = GGG(("../gai-lib/gai.yml"))

data = {
    "collection_name": "demo",
    "query_texts": "Who are the young seniors?",
}
response = ggg("retrieve", **data)
context = response.text
question = "Who are the young seniors?"
answer = ggg("ttt", messages=f"user: Based on the context below: <context>{context}</context>, answer the question: {question}\nassistant:")
for chunk in answer:
    print(chunk.decode(), end="", flush=True)

In [None]:
from gai.lib.GGG import GGG
ggg=GGG(("../gai-lib/gai.yml"))

tools = [
    {
        "type": "function",
        "function": {
            "name": "gg",
            "description": "The 'gg' function is a powerful tool that allows the AI to gather external information from the internet using Google search. It can be invoked when the AI needs to answer a question or provide information that requires up-to-date, comprehensive, and diverse sources which are not inherently known by the AI. For instance, it can be used to find current news, weather updates, latest sports scores, trending topics, specific facts, or even the current date and time. The usage of this tool should be considered when the user's query implies or explicitly requests recent or wide-ranging data, or when the AI's inherent knowledge base may not have the required or most current information. The 'search_query' parameter should be a concise and accurate representation of the information needed.",
            "parameters": {
                "type": "object",
                "properties": {
                    "search_query": {
                        "type": "string",
                        "description": "The search query to search google with. For example, to find the current date or time, use 'current date' or 'current time' respectively."
                    }
                },
                "required": ["search_query"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "rag",
            "description": "The 'rag' function is a specialized tool that allows the AI to perform semantic searches on PM Lee Hsien Loong's 2023 National Day Rally. It can be invoked when the AI needs to retrieve facts or information from the speech. This function utilizes advanced Natural Language Processing (NLP) techniques to understand and match the semantic meaning of the user's query with the content of the speech. This is particularly useful when the user's query relates to specific themes, topics, or statements made during the rally.",
            "parameters": {
                "type": "object",
                "properties": {
                    "search_query": {
                        "type": "[query_1, query_2, query_3]",
                        "description": "An array of search queries to perform a semantic search in the vector database. Each string in the array represents a different way of asking the question. This expands the coverage of the search and increases the chance of finding the best match. For example, instead of using one query like 'economic policies', use multiple variations like ['PM Lee Hsien Loong's economic policies announced at the 2023 National Day Rally', 'What were the economic strategies discussed by PM Lee in 2023 National Day Rally?', 'Economic measures announced by PM Lee in 2023 Rally']."
                    }
                },
                "required": ["search_query"]
            }
        }
    }
]


# RAG + Function Call
import json
print("> Mistral-7B")
question = "What did PM Lee say about young seniors?"

messages = [{'role':'user','content':question},{'role':'assistant','content':''}]
response = ggg(category="ttt",
               messages=messages, 
               tools=tools,
               stream=False,
               max_new_tokens=100)
result=response.decode()
search_query = json.loads(result['arguments'])['search_query'][0]
data = {
    "collection_name": "demo",
    "query_texts": search_query,
}
response = ggg("retrieve", **data)
context = response.text
answer = ggg("ttt", messages=f"user: Based on the context below: <context>{context}</context>, answer the question: {question}\nassistant:")
for chunk in answer:
    print(chunk.decode(), end="", flush=True)


In [None]:
# delete document
from gai.lib.RAGClient import RAGClient
rag = RAGClient(("../gai-lib/gai.yml"))
rag.delete_document(docs['documents'][0]['id'])