Error calling LLM API: Connection error.

CUDA_VISIBLE_DEVICES=4,5 vllm serve  Qwen2.5-VL-7B-Instruct --dtype bfloat16 \
--gpu_memory_utilization=0.8 --enable_prefix_caching --tensor-parallel-size=2 \
--limit-mm-per-prompt image=20 --served-model-name Qwen2.5-VL-7B-Instruct



def initialize_client(args, base_url=None, model_name=None):
    """Initialize the OpenAI client wrapper with the config list."""
    api_key = ""
    try:
        with open(args.api_key_file, 'r') as f:
            api_key = f.read().strip()
    except Exception as e:
        print(f"Error reading API key file: {e}")
        return None
    
    config_list = [
        {
            "model": /Qwen2.5-VL-7B-Instruct
            "base_url":http://localhost:8000/v1 ,
            "api_key": "",
            "api_type": "openai",
            "price": [0.08/1000, 0.24/1000]
        }
    ]
    
    return OpenAIWrapper(config_list=config_list, cache_seed=args.cache_seed)


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Error calling LLM API: Connection error. #1

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Error calling LLM API: Connection error. #1

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions