-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Description
CUDA_VISIBLE_DEVICES=4,5 vllm serve Qwen2.5-VL-7B-Instruct --dtype bfloat16
--gpu_memory_utilization=0.8 --enable_prefix_caching --tensor-parallel-size=2
--limit-mm-per-prompt image=20 --served-model-name Qwen2.5-VL-7B-Instruct
def initialize_client(args, base_url=None, model_name=None):
"""Initialize the OpenAI client wrapper with the config list."""
api_key = ""
try:
with open(args.api_key_file, 'r') as f:
api_key = f.read().strip()
except Exception as e:
print(f"Error reading API key file: {e}")
return None
config_list = [
{
"model": /Qwen2.5-VL-7B-Instruct
"base_url":http://localhost:8000/v1 ,
"api_key": "",
"api_type": "openai",
"price": [0.08/1000, 0.24/1000]
}
]
return OpenAIWrapper(config_list=config_list, cache_seed=args.cache_seed)
Metadata
Metadata
Assignees
Labels
No labels