# Bulk Classification
For example to tag documents based on their content

In [3]:
import openai
import instructor

In [4]:
client = instructor.from_openai(openai.AsyncOpenAI())

In [5]:
from typing import List
from pydantic import BaseModel, ValidationInfo, model_validator

In [8]:
class Tag(BaseModel):
    id: int
    name: str

    @model_validator(mode="after")
    def validate_ids(self, info: ValidationInfo):
        context = info.context
        if context:
            tags: List[Tag] = context.get("tags")
            assert self.id in {tag.id for tag in tags}, f"Tag id {self.id} not found in tags"
            assert self.name in {tag.name for tag in tags}, f"Tag name {self.name} not found in tags"
        return self

class TagWithInstructions(Tag):
    instructions: str
    
class TagRequest(BaseModel):
    texts: list[str]
    tags: list[TagWithInstructions]
    

class TagResponse(BaseModel):
    texts: list[str]
    predictions: list[Tag]

In [7]:
import asyncio 
async def tag_single_request(text: str, tags: list[Tag]) -> Tag:
    allowed_tags = [(tag.id, tag.name) for tag in tags]
    allowed_tags_str = ", ".join([f"`{tag}`" for tag in allowed_tags])
    
    return await client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": "You are a world-class text tagging system."},
            {"role": "user", "content": f"Describe the following text: `{text}`"},
            {"role": "user", "content": f"Here are the allowed tags: {allowed_tags_str}"},
        ],
        response_model=Tag,
        validation_context={"tags": tags},
    )
    
async def tag_requests(request: TagRequest) -> TagResponse:
    predictions = await asyncio.gather(
      *[tag_single_request(text, request.tags) for text in request.texts]
    )
    return TagResponse(
      texts=request.texts,
      predictions=predictions,
    )