In [None]:
from cornserve.frontend import CornserveClient

In [None]:
cornserve = CornserveClient(url="your-cluster.com:port")

In [None]:
import asyncio
from cornserve.app.base import AppRequest, AppResponse
from cornserve.task.builtins.mllm import MLLMInput, MLLMTask, Modality

class Request(AppRequest):
    """App request model.

    Attributes:
        prompt: The prompt to send to the LLM.
        multimodal_data: List of tuples (modality, data URL).
    """

    prompt: str
    multimodal_data: list[tuple[str, str]] = []


class Response(AppResponse):
    """App response model.

    Attributes:
        response: The response from the LLM.
    """

    response: str


mllm = MLLMTask(
    model_id="Qwen/Qwen2-VL-7B-Instruct",
    modalities=[Modality.IMAGE],
)

In [None]:
cornserve.deploy(mllm)

In [None]:
llava = MLLMTask(
    model_id="llava-hf/llava-onevision-qwen2-7b-ov-chat-hf",
    modalities=[Modality.IMAGE],
)

In [None]:
cornserve.deploy(llava)

In [None]:
async def serve(request: Request) -> Response:
    """Main serve function for the app."""
    mllm_input = MLLMInput(prompt=request.prompt, multimodal_data=request.multimodal_data)
    mllm_output = await mllm(mllm_input)
    return Response(response=mllm_output.response)

In [None]:
cornserve.is_connected()

In [None]:
request = Request(
        prompt="What is this image about?",
        multimodal_data=[("image", "https://picsum.photos/seed/random/200/300")],
    )
response = await serve(request)

In [None]:
cornserve.remove(llava)

In [None]:
print(response.response)

In [None]:
from cornserve.task.base import Task, TaskInput, TaskOutput
class MyTaskInput(TaskInput):
    """Task input model."""

    first_prompt: str
    second_prompt: str
    multimodal_data: list[tuple[str, str]] = []

class MyTaskOutput(TaskOutput):
    """Task output model."""
    response: str

In [None]:
class MyTask(Task):
    def __init__(self):
        super().__init__()
        self.mllm = MLLMTask(
            model_id="Qwen/Qwen2-VL-7B-Instruct",
            modalities=[Modality.IMAGE],
        )

    def invoke(self, input: MyTaskInput) -> MyTaskOutput:
        """Invoke the task with the given input."""
        mllm_input = MLLMInput(
            prompt=input.first_prompt,
            multimodal_data=input.multimodal_data,
        )
        mllm_output = self.mllm.invoke(mllm_input)


        input = MLLMInput(
            prompt=input.second_prompt,
            multimodal_data=input.multimodal_data,
        )

        output = self.mllm.invoke(input)
        return MyTaskOutput(response=f"{mllm_output.response} \n-----\n{output.response}")

In [None]:
my_task = MyTask()

In [None]:
cornserve.deploy(my_task)

In [None]:
response = await my_task(
    MyTaskInput(
        first_prompt="What is this image about?",
        second_prompt="Write a haiku",
        multimodal_data=[("image", "https://picsum.photos/seed/picsum/200/300")],
    )
)
print(response.response)

In [None]:
cornserve.remove(mllm)

In [None]:
cornserve.remove(my_task)

In [None]:
cornserve.close()