# DeepSeek LM examples

In [None]:
from vllm import LLM, SamplingParams

GPU_UTILIZATION=0.98
model_id = "deepseek-ai/deepseek-llm-7b-base"
prompt = ["Tell me a joke"]*100

sampling_params = SamplingParams(temperature=0.7, top_p=0.9, max_tokens=100)
llm = LLM(model=model_id, gpu_memory_utilization=GPU_UTILIZATION)
out = llm.generate(prompt, sampling_params)

answer = [output.outputs[0].text for output in out]
print(answer)


# Qwen VLM example

In [None]:
# VLM: Here we will use Qwen Model to describe the images:
from tqdm import tqdm
from PIL import Image
from qwen_vl_utils import process_vision_info
from transformers import AutoProcessor
from vllm import LLM, SamplingParams

GPU_UTILIZATION=0.98
model_id = "Qwen/Qwen2-VL-7B-Instruct"
base_prompt = "Describe the image"
# Provide your image or use the demo image from https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg
image_list = ["demo.jpeg"]*100 


# Load Model and Tokenizer
processor = AutoProcessor.from_pretrained(model_id)
sampling_params = SamplingParams(temperature=0.7, top_p=0.8, max_tokens=512)
llm = LLM(model=model_id, gpu_memory_utilisation=GPU_UTILIZATION)

In [None]:
vllm_input = []
for image in tqdm(image_list):
    messages = [
        {"role": "user", "content": [
            {
                "type": "image",
                "image": image
            },
            {
                "type": "text",
                "text": base_prompt,
            },
        ]}
    ]
    prompt = processor.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    image_inputs, _ = process_vision_info(messages)
    multimodal_data = {}
    if image_inputs:
        multimodal_data['image'] = image_inputs
    vllm_input.append({
        'prompt': prompt,
        'multi_modal_data': multimodal_data,
    })
    
    out = llm.generate(vllm_input, sampling_params)
    answer = [o.outputs[0].text.strip() for o in out]
print(answer)