# Endpoint Feature Test

### Create OpenAI client

In [66]:
from openai import OpenAI

client = OpenAI(
    api_key = "ocigenerativeai",
    base_url = "http://127.0.0.1:8088/api/v1/",
    )
models = client.models.list()

# Test chat completions
for model in models:
    print(model.id)


cohere.command-latest
cohere.command-a-03-2025
cohere.command-r-plus-08-2024
cohere.command-r-08-2024
meta.llama-latest
meta.llama-4-maverick-17b-128e-instruct-fp8
meta.llama-4-scout-17b-16e-instruct
meta.llama-3.3-70b-instruct
meta.llama-3.2-11b-vision-instruct
meta.llama-3.2-90b-vision-instruct
my-dedicated-model-name
my-datascience-model-name
ODSC-Mistral-7B-Instruct
ODSC-DeepSeek-R1-Distill-Qwen-7B
ODSC-sql-coder-graph


### Test chat completions with non-streaming response

In [55]:
test_models = [
    "cohere.command-a-03-2025",
    "cohere.command-r-plus-08-2024",
    "cohere.command-r-08-2024",
    "meta.llama-3.3-70b-instruct",
    "meta.llama-3.2-90b-vision-instruct",
    ]

In [56]:
# Test chat completions with non-streaming response
message = "Hello!"
print("User:", message)

for model_name in test_models:
    print("*"*20,"Model:",model_name,"*"*20,"\nAssistant:", end='')
    completion = client.chat.completions.create(
        model = model_name,
        messages = [{"role": "user", "content": message}],
        max_tokens=12,
        )
    print(completion.choices[0].message.content)

User: Hello!
******************** Model: cohere.command-a-03-2025 ******************** 
Assistant:Hello! How can I assist you today? Whether you
******************** Model: cohere.command-r-plus-08-2024 ******************** 
Assistant:Hello! How can I help you today?
******************** Model: cohere.command-r-08-2024 ******************** 
Assistant:Hello! How can I assist you today?
******************** Model: meta.llama-3.3-70b-instruct ******************** 
Assistant:Hello. It's nice to meet you. Is there something
******************** Model: meta.llama-3.2-90b-vision-instruct ******************** 
Assistant:Hello. It's nice to meet you. Is there something


### Test chat completions with streaming response

In [57]:
message = "Hello!"
print("User:", message)

for model in test_models:    
    print('\n',"*"*20,"Model:",model,"*"*20,"\nAssistant:", end='')
    response = client.chat.completions.create(
        model=model,
        messages=[{'role': 'user', 'content': message}],
        max_tokens=12,
        stream=True  # this time, we set stream=True
    )
    for chunk in response:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content,end='')

User: Hello!

 ******************** Model: cohere.command-a-03-2025 ******************** 
Assistant:Hello! How can I assist you today? Whether you
 ******************** Model: cohere.command-r-plus-08-2024 ******************** 
Assistant:Hello! How can I help you today?
 ******************** Model: cohere.command-r-08-2024 ******************** 
Assistant:Hello! How can I assist you today?
 ******************** Model: meta.llama-3.3-70b-instruct ******************** 
Assistant:Hello. It's nice to meet you. Is there
 ******************** Model: meta.llama-3.2-90b-vision-instruct ******************** 
Assistant:Hello. It's nice to meet you. Is there

### Test multi-modal with image input response

In [61]:
test_models = ["meta.llama-3.2-90b-vision-instruct"]

import base64
import mimetypes

image_path = "./test/image.jpg"
mime_type, _ = mimetypes.guess_type(image_path)
with open(image_path, "rb") as image_file:
    base64_str = base64.b64encode(image_file.read()).decode('utf-8')
url = f"data:{mime_type};base64,{base64_str}"
print(url[:100])

content = [
    {"type": "text","text": "describe this image?"},
	{"type": "image_url","image_url": {"url": url}}
    ]

for model in test_models:
    print('\n',"*"*20,"Model:",model,"*"*20)
    completion = client.chat.completions.create(
        model = model,
        messages = [{"role": "user", "content": content}],
        max_tokens=120,
        )
    print("Assistant:\n", completion.choices[0].message.content)

data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAeAB4AAD/4QAiRXhpZgAATU0AKgAAAAgAAQESAAMAAAABAAEAAAAAAAD/2

 ******************** Model: meta.llama-3.2-90b-vision-instruct ********************
Assistant:
 This image showcases the iconic Superman logo, a stylized shield with a distinctive red and yellow color scheme. The logo features a red shield shape with a yellow "S" inside, which is a nod to Superman's Kryptonian heritage. The shield is outlined in black, adding depth and contrast to the design.

The background of the image is a solid blue color, which provides a clean and simple backdrop for the logo. The overall aesthetic of the image is bold and eye-catching, making it instantly recognizable as the symbol of the Man of Steel.

In terms of specific details, the image appears to


### Test tool calls

In [77]:
def get_weather(latitude, longitude):
    import requests
    response = requests.get(f"https://api.open-meteo.com/v1/forecast?latitude={latitude}&longitude={longitude}&current=temperature_2m,wind_speed_10m&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m")
    data = response.json()
    return data['current']['temperature_2m']

tools = [{
    "type": "function",
    "function": {
        "name": "get_weather",
        "description": "Get current temperature for provided coordinates in celsius.",
        "parameters": {
            "type": "object",
            "properties": {
                "latitude": {"type": "number"},
                "longitude": {"type": "number"}
            },
            "required": ["latitude", "longitude"],
            "additionalProperties": False
        },
        "strict": True
    }
}]


#### Cohere and Llama model without streaming

In [82]:
#### Cohere and Llama model without streaming

messages = [{"role": "user", "content": "What is the weather like in Paris and New York today?"}]

for model in ["cohere.command-a-03-2025",
              "meta.llama-3.3-70b-instruct"]:
    completion = client.chat.completions.create(
        model=model,
        messages=messages,
        tools=tools,
        stream=False
        )
    print(f"Function call output for model -- {model}")
    print(completion.choices[0].message.tool_calls,"\n")
    
    if model.startswith("cohere"):
        new_message = completion.choices[0].message
        tool_calls = completion.choices[0].message.tool_calls

Function call output for model -- cohere.command-a-03-2025
[ChatCompletionMessageToolCall(id='eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0OC44NTY2LCBcImxvbmdpdHVkZVwiOiAyLjM1MjJ9In0=', function=Function(arguments='{"latitude": 48.8566, "longitude": 2.3522}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0MC43MTI4LCBcImxvbmdpdHVkZVwiOiAtNzQuMDA2fSJ9', function=Function(arguments='{"latitude": 40.7128, "longitude": -74.006}', name='get_weather'), type='function', index=1)] 

Function call output for model -- meta.llama-3.3-70b-instruct
[ChatCompletionMessageToolCall(id='chatcmpl-tool-4c002eb5bcfb4353bda8f8ac0502ef86', function=Function(arguments='{"latitude": "48.8566", "longitude": "2.3522"}', name='get_weather'), type='function', index=0)] 



#### Cohere and Llama model with streaming

In [69]:
#### Cohere and Llama model with streaming

messages = [{"role": "user", "content": "What is the weather like in Paris and New York today?"}]

for model in ["cohere.command-a-03-2025",
              "meta.llama-3.3-70b-instruct"]:
    response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            stream=True  # this time, we set stream=True
        )
    print(f"\nFunction call output for model -- {model}")
    for chunk in response:    
        if not chunk.choices[0].delta.tool_calls:
            print(chunk.choices[0].delta.content,end='')
        else:
            if model.startswith("cohere"):
                print("\n")
            print(chunk.choices[0].delta.tool_calls)


Function call output for model -- cohere.command-a-03-2025
I will use the get_weather tool to find the current temperature in Paris and New York.I will use the get_weather tool to find the current temperature in Paris and New York.

[ChoiceDeltaToolCall(index=0, id='eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0OC44NTY2LCBcImxvbmdpdHVkZVwiOiAyLjM1MjJ9In0=', function=ChoiceDeltaToolCallFunction(arguments='{"latitude": 48.8566, "longitude": 2.3522}', name='get_weather'), type='function'), ChoiceDeltaToolCall(index=1, id='eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0MC43MTI4LCBcImxvbmdpdHVkZVwiOiAtNzQuMDA2fSJ9', function=ChoiceDeltaToolCallFunction(arguments='{"latitude": 40.7128, "longitude": -74.006}', name='get_weather'), type='function')]

Function call output for model -- meta.llama-3.3-70b-instruct
[ChoiceDeltaToolCall(index=0, id='chatcmpl-tool-f5158515118b4b4bb59e69ff040e321d', function=ChoiceDeltaToolCallFunction(arguments=Non

In [79]:

messages.append(new_message)
for each in messages:
    print(each)

[{'role': 'user', 'content': 'What is the weather like in Paris and New York today?'}, ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0OC44NTY2LCBcImxvbmdpdHVkZVwiOiAyLjM1MjJ9In0=', function=Function(arguments='{"latitude": 48.8566, "longitude": 2.3522}', name='get_weather'), type='function', index=0), ChatCompletionMessageToolCall(id='eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0MC43MTI4LCBcImxvbmdpdHVkZVwiOiAtNzQuMDA2fSJ9', function=Function(arguments='{"latitude": 40.7128, "longitude": -74.006}', name='get_weather'), type='function', index=1)])]


In [83]:
# use tool to get weather
import json
for tool_call in tool_calls:
    args = json.loads(tool_call.function.arguments)
    result = get_weather(args["latitude"], args["longitude"])
    print(result)
    # append result message
    messages.append({                               
        "role": "tool",
        "tool_call_id": tool_call.id,
        "content": str(result)
    })

for each in messages:
    print(each)

14.9
13.3
{'role': 'user', 'content': 'What is the weather like in Paris and New York today?'}
{'role': 'tool', 'tool_call_id': 'eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0OC44NTY2LCBcImxvbmdpdHVkZVwiOiAyLjM1MjJ9In0=', 'content': '14.9'}
{'role': 'tool', 'tool_call_id': 'eyJuYW1lIjogImdldF93ZWF0aGVyIiwgImFyZ3VtZW50cyI6ICJ7XCJsYXRpdHVkZVwiOiA0MC43MTI4LCBcImxvbmdpdHVkZVwiOiAtNzQuMDA2fSJ9', 'content': '13.3'}


In [85]:
# Generate answer based on tool results without stream

for model in ["cohere.command-a-03-2025",
              "meta.llama-3.3-70b-instruct"]:
    completion_2 = client.chat.completions.create(
        model=model,
        messages=messages,
        stream=False
    )
    print(f"\nOutput for model -- {model}")
    print(completion_2.choices[0].message.content)


Output for model -- cohere.command-a-03-2025
I'm unable to provide the current weather in Paris and New York as I don't have access to real-time information. However, I can suggest checking a reliable weather website or using a weather application on your device for the most up-to-date information.

If you'd like, I can provide some general information about the climate in these cities. Paris typically has a temperate climate with mild winters and warm summers, while New York experiences a humid subtropical climate with hot summers and cold winters.

Please note that these are general descriptions and may not reflect the current weather conditions. For accurate and current weather information, please consult a reliable source.

Output for model -- meta.llama-3.3-70b-instruct
The current weather in Paris is mostly cloudy with a high of 14.9°C and a low of 13.3°C. The current weather in New York is partly cloudy with a high of 14.9°C and a low of 13.3°C.


In [86]:
# Generate answer based on tool results with stream
for model in ["cohere.command-a-03-2025",
              "meta.llama-3.3-70b-instruct"]:
    completion_2 = client.chat.completions.create(
        model=model,
        messages=messages,
        stream=True
        )
    print(f"\nOutput for model -- {model}")
    for chunk in completion_2:
        if chunk.choices[0].delta.content:
            print(chunk.choices[0].delta.content,end='')


Output for model -- cohere.command-a-03-2025
I'm unable to provide the current weather in Paris and New York as I don't have access to real-time information. However, I can suggest checking a reliable weather website or using a weather application on your device for the most up-to-date information.

If you'd like, I can provide some general information about the climate in these cities. Paris typically has a temperate climate with mild winters and warm summers, while New York experiences a humid subtropical climate with hot summers and cold winters.

Please note that weather conditions can change rapidly, so it's always best to consult a reliable source for the most accurate and current information.
Output for model -- meta.llama-3.3-70b-instruct
I'm not able to tell you the weather in Paris and New York today. I'm a large language model, I don't have access to real-time information about the current weather in specific locations. I can suggest some ways for you to find out the curren

### Test embedding

In [10]:
embd_model=[ 'cohere.embed-english-light-v3.0',
            'cohere.embed-english-v3.0',
            'cohere.embed-multilingual-light-v3.0',
            'cohere.embed-multilingual-v3.0']
input = ["hello!","你好！"]
for model in embd_model:
    print("model:",model)
    response = client.embeddings.create(input = input, model=model).data
    for each in response:
        print(each.index,':',str(each.embedding)[:36],'......',str(each.embedding)[-36:])
    print("*"*100)

model: cohere.embed-english-light-v3.0
0 : [0.049591064453125, 0.03173828125, - ...... 0824737548828125, 0.021087646484375]
1 : [-0.06787109375, -0.05828857421875,  ...... .0199127197265625, 0.04034423828125]
****************************************************************************************************
model: cohere.embed-english-v3.0
0 : [-0.015960693359375, 0.0127182006835 ...... -0.0203857421875, 0.020050048828125]
1 : [-0.01410675048828125, -0.0110168457 ...... 021575927734375, 0.0208587646484375]
****************************************************************************************************
model: cohere.embed-multilingual-light-v3.0
0 : [-0.018890380859375, 0.0327453613281 ...... 398193359375, 0.0008997917175292969]
1 : [0.038238525390625, 0.01398468017578 ......  0.08721923828125, 0.02984619140625]
****************************************************************************************************
model: cohere.embed-multilingual-v3.0
0 : [-0.00600433349609375, 0.0306