In [3]:
# text classification 
import enum
from pydantic import BaseModel

In [4]:
class Labels(str, enum.Enum):
    """Enum for single-label text classification"""
    SPAM = "spam"
    NOT_SPAM = "not_spam"

class SinglePrediction(BaseModel):
    """Class for a single class label predictions"""
    class_label: Labels

In [2]:
from openai import OpenAI
import instructor

In [6]:
instructor_client = instructor.from_openai(OpenAI())

In [9]:
def text_classify(data) -> SinglePrediction:
    return instructor_client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=SinglePrediction,
        messages=[
            {
                "role": "user",
                "content": f"Classify the following text: {data}"
            }
        ]
    )

In [10]:
predic1 = text_classify("Hey there I T'challa the Nigerian prince planning to write my hard earned dollars to your account.")
assert predic1.class_label == Labels.SPAM

In [11]:
predic1.class_label

<Labels.SPAM: 'spam'>

In [14]:
# multi-label classification
from typing import List

class MultiLabels(str, enum.Enum):
    TECH_ISSUE = "tech_issue"
    BILLING = "billing"
    GENERAL_QUERY = "general_query"

class MultiClassPrediction(BaseModel):
    """
    Class for a multi-class label prediction
    """ 
    class_labels : List[MultiLabels]

In [15]:
def multi_classify(data) -> MultiClassPrediction:
    return instructor_client.chat.completions.create(
        model='gpt-3.5-turbo',
        response_model=MultiClassPrediction,
        messages=[
            {
                "role": "user",
                "content": f"Classify the following the support ticket: {data}"
            }
        ]
    )

In [16]:
ticket = "My account is locked and I can't access my billing info."
prediction = multi_classify(ticket)
prediction

MultiClassPrediction(class_labels=[<MultiLabels.TECH_ISSUE: 'tech_issue'>, <MultiLabels.BILLING: 'billing'>, <MultiLabels.GENERAL_QUERY: 'general_query'>])

In [17]:
prediction.class_labels
# Its pulling out all the classes

[<MultiLabels.TECH_ISSUE: 'tech_issue'>,
 <MultiLabels.BILLING: 'billing'>,
 <MultiLabels.GENERAL_QUERY: 'general_query'>]

In [3]:
from openai import AsyncOpenAI
async_client = instructor.from_openai(
    AsyncOpenAI()
)

In [4]:
tags =[
    {"id": 0,
     "name": "personal",
     "instruction": "Personal Information"},
    {"id": 1,
     "name": "phone",
     "instruction": "Phone Number"},
    {"id": 2,
     "name": "email",
     "instruction": "Email Address"},
    {"id": 3,
     "name": "address",
     "instruction": "Home Address"},
    {"id": 4,
     "name": "Other",
     "instruction": "Other Information"},
]

In [5]:
from typing import List
import asyncio
from pydantic import ( 
    BaseModel,
    ValidationInfo, 
    model_validator
)

class Tag(BaseModel):
    id: int
    name: str

    @model_validator(mode="after")
    def validate_ids(self, info: ValidationInfo):
        context = info.context
        if context:
            tags: List[Tag] = context.get("tags")
            assert self.id in {
                tag.id for tag in tags
            }, f"Tag ID {self.id} not found in context"
            assert self.name in {
                tag.name for tag in tags
            }, f"Tag name {self.name} not found in context"
        return self

class TagWithInstructions(Tag):
    instructions: str

class TagRequest(BaseModel):
    texts: List[str]
    tags: List[TagWithInstructions]

class TagResponse(BaseModel):
    texts: List[str]
    predictions: List[Tag]

In [6]:
async def tag_single_request(text: str, tags: List[Tag]) -> Tag:
    allowed_tags = [(tag.id, tag.name) for tag in tags]
    allowed_tags_str = ", ".join([f"`{tag}`" for tag in allowed_tags])

    return await async_client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {
                "role": "system",
                "content": "You are a world-class text tagging system.",
            },
            {
                "role": "user",
                "content": f"Describe the following text: `{text}`"},
            {
                "role": "user",
                "content": f"Here are the allowed tags: {allowed_tags_str}",
            },
        ],
        response_model=Tag,  # Minimizes the hallucination of tags that are not in the allowed tags.
        validation_context={"tags": tags},
    )

async def tag_request(request: TagRequest) -> TagResponse:
    predictions = await asyncio.gather(
        *[tag_single_request(text, request.tags) for text in request.texts]
    )
    return TagResponse(
        texts=request.texts,
        predictions=predictions,
    )

In [None]:
tags = [
    TagWithInstructions(id=0, name="personal", instructions="Personal information"),
    TagWithInstructions(id=1, name="phone", instructions="Phone number"),
    TagWithInstructions(id=2, name="email", instructions="Email address"),
    TagWithInstructions(id=3, name="address", instructions="Address"),
    TagWithInstructions(id=4, name="Other", instructions="Other information"),
]

# Texts will be a range of different questions.
# Such as "How much does it cost?", "What is your privacy policy?", etc.

texts = [
    "What is your phone number?",
    "What is your email address?",
    "What is your address?",
    "What is your privacy policy?",
]
# The request will contain the texts and the tags.

request = TagRequest(texts=texts, tags=tags)
# The response will contain the texts, the predicted tags, and the confidence.

response = asyncio.run(tag_request(request))

print(response.model_dump_json(indent=2))