In [1]:
len(range(10,100))

90

In [3]:
from pathlib import Path

In [5]:
Path('foo/bar').relative_to('foo')

WindowsPath('bar')

In [8]:
# find what model is running on a vllm server
import requests
import json
import os
from typing import Dict, Any

def get_vllm_model_info(server_url: str) -> Dict[str, Any]:
	"""
	Get the model information from a VLLM server.

	Args:
		server_url (str): The URL of the VLLM server.

	Returns:
		Dict[str, Any]: A dictionary containing the model information.
	"""
	try:
		response = requests.get(f"{server_url}/v1/models")
		response.raise_for_status()  # Raise an error for bad responses
		model_info = response.json()
		return model_info
	except requests.RequestException as e:
		print(f"Error fetching model info: {e}")
		return {}

info = get_vllm_model_info("http://localhost:8000")
info

{'object': 'list',
 'data': [{'id': 'microsoft/Phi-4-multimodal-instruct',
   'object': 'model',
   'created': 1743704659,
   'owned_by': 'vllm',
   'root': 'microsoft/Phi-4-multimodal-instruct',
   'parent': None,
   'max_model_len': 65536,
   'permission': [{'id': 'modelperm-97ec2978acaa401e8ae91ab95a623042',
     'object': 'model_permission',
     'created': 1743704659,
     'allow_create_engine': False,
     'allow_sampling': True,
     'allow_logprobs': True,
     'allow_search_indices': False,
     'allow_view': True,
     'allow_fine_tuning': False,
     'organization': '*',
     'group': None,
     'is_blocking': False}]}]}

In [40]:
info['data'][0]['id']

'microsoft/Phi-4-multimodal-instruct'

In [9]:
import openai

In [13]:
openai.ChatCompletion.create(
	  model="microsoft/Phi-4-multimodal-instruct",

  messages=[
		{"role": "user", "content": "Hello!"},
	]
	# temperature=0,
)

APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [15]:
from openai import OpenAI

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
completion = client.completions.create(model="microsoft/Phi-4-multimodal-instruct",
                                      prompt="Complete the sentence: San Francisco is a")
print("Completion result:", completion)

Completion result: Completion(id='cmpl-9f402c1a31004efdba6df3c46bfb81ec', choices=[CompletionChoice(finish_reason='length', index=0, logprobs=None, text=' city in _____. Multiple choice:\na. California\nb. New York\n', stop_reason=None, prompt_logprobs=None)], created=1743710032, model='microsoft/Phi-4-multimodal-instruct', object='text_completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=8, total_tokens=24, completion_tokens_details=None, prompt_tokens_details=None))


In [18]:
resp = client.chat.completions.create(model="microsoft/Phi-4-multimodal-instruct",
                                      messages=[{'role':'user', 'content': 'Tell me a short joke.'}])
resp

ChatCompletion(id='chatcmpl-db823bf039a64989b0514e7dd29c52e9', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Why was the math book sad?\n\nBecause it had too many problems! 😄', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[], reasoning_content=None), stop_reason=200020)], created=1743710209, model='microsoft/Phi-4-multimodal-instruct', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=9, total_tokens=26, completion_tokens_details=None, prompt_tokens_details=None), prompt_logprobs=None)

In [21]:
resp.choices[0].completion_tokens

AttributeError: 'Choice' object has no attribute 'completion_tokens'

In [22]:
resp.usage

CompletionUsage(completion_tokens=17, prompt_tokens=9, total_tokens=26, completion_tokens_details=None, prompt_tokens_details=None)

In [41]:
stream = client.chat.completions.create(model="microsoft/Phi-4-multimodal-instruct", stream=True,
										stream_options={"include_usage": True},
                                      messages=[{'role':'user', 'content': 'Tell me a short joke.'}])


In [42]:
stream

<openai.Stream at 0x20b1f75b710>

In [43]:
chunk = next(stream)

In [44]:
chunk.choices[0].delta

ChoiceDelta(content='', function_call=None, refusal=None, role='assistant', tool_calls=None)

In [34]:
type(stream)

openai.Stream

In [45]:
chunk.usage