In [5]:
%%writefile fastapi_processing.py
from fastapi import FastAPI, HTTPException
from openai import OpenAI
from openai import AsyncOpenAI  # Note the AsyncOpenAI import

from pydantic import BaseModel
from typing import Optional, List, Dict, Any
import uvicorn

app = FastAPI()

# vLLM client configuration
VLLM_CLIENT = AsyncOpenAI(
    api_key="EMPTY",
    base_url="http://localhost:8000/v1"
)

class GenerateRequest(BaseModel):
    messages: List[Dict[str, Any]]
    max_tokens: Optional[int] = 1024

@app.post("/v1/generateText")
async def generate_text(request: GenerateRequest):
    print("Received request")
    try:
        # Use await with the async client
        response = await VLLM_CLIENT.chat.completions.create(
            model="Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4",
            messages=request.messages,
            max_tokens=request.max_tokens
        )
        return response.choices[0].message.content
    
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
    
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

Overwriting fastapi_processing.py


In [6]:
!uvicorn fastapi_processing:app --host 0.0.0.0 --port 8001

[32mINFO[0m:     Started server process [[36m20157[0m]
[32mINFO[0m:     Waiting for application startup.
[32mINFO[0m:     Application startup complete.
[32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:8001[0m (Press CTRL+C to quit)
Received request
[32mINFO[0m:     127.0.0.1:57742 - "[1mPOST /v1/generateText HTTP/1.1[0m" [32m200 OK[0m
Received request
Received request
Received request
Received request
[32mINFO[0m:     127.0.0.1:40166 - "[1mPOST /v1/generateText HTTP/1.1[0m" [32m200 OK[0m
Received request
[32mINFO[0m:     127.0.0.1:40168 - "[1mPOST /v1/generateText HTTP/1.1[0m" [32m200 OK[0m
[32mINFO[0m:     127.0.0.1:40190 - "[1mPOST /v1/generateText HTTP/1.1[0m" [32m200 OK[0m
[32mINFO[0m:     127.0.0.1:40174 - "[1mPOST /v1/generateText HTTP/1.1[0m" [32m200 OK[0m
Received request
Received request
Received request
[32mINFO[0m:     127.0.0.1:53506 - "[1mPOST /v1/generateText HTTP/1.1[0m" [32m200 OK[0m
Received request
[32mINFO[0m:   