In [1]:
import polars as pl
from openai import OpenAI, AsyncOpenAI
from pydantic import BaseModel, Field
from typing import List
import traceback
import json
import tqdm
import asyncio

In [2]:
data = pl.read_parquet("./data/data.parquet")
data = data.with_columns(
    pl.arange(1, data.height + 1).alias("item_id")
)
category_description = data[["item_id","description"]].to_dict(as_series=False)
items = [dict(zip(category_description.keys(), values)) for values in zip(*category_description.values())]

In [3]:
# constants
BATCH_SIZE = 5
# Update this to your llama.cpp server endpoint
LLAMA_CPP_BASE_URL = "http://localhost:8080/v1"

In [4]:
class ItemReview(BaseModel):
    item_id: int = Field(description="unique identifier that is provided in the input.", title="item_id")
    classification: str = Field(description="The classification of the item (e.g.,Food,cloths).")
    review: str = Field(description="A brief review of the item.")

class Response(BaseModel):
    reviews: List[ItemReview] = Field(min_length=BATCH_SIZE, description="A list of classifications and reviews for the provided items.")

## Using AsyncIO with llama.cpp

In [5]:
# Initialize the async OpenAI client for llama.cpp
client = AsyncOpenAI(
    api_key="sk-no-key-required",  # llama.cpp doesn't require a real API key
    base_url=LLAMA_CPP_BASE_URL
)

In [6]:
async def Chat(client, context):
    response = await client.chat.completions.create(
        messages=context,
        model="gpt-3.5-turbo",  # This is ignored by llama.cpp, it uses whatever model is loaded
        response_format={"type": "json_object"},
        temperature=1,
        max_tokens=4000,
        timeout=60
    )
    return response

In [7]:
def dict_to_text(items: list[dict]) -> str:
    lines = []
    for item in items:
        line = "\n".join([f"{key} : {value}" for key, value in item.items()])
        lines.append(line)
    return "\n".join(lines)

In [8]:
def createPrompt(batch_items):
    prompt_instruction = """
                You are a helpful assistant that classifies and reviews items.
                
                Each item has:
                - "item_id": unique id for each item
                - "description": the item's description
                
                Return a JSON object with a "reviews" array containing objects with the following keys:
                - "item_id" : same as item_id from input
                - "classification" : classification of item category  
                - "review" : small review 1-2 phrases max
                
                Example format:
                {
                  "reviews": [
                    {
                      "item_id": 1,
                      "classification": "Electronics",
                      "review": "Great product with excellent features."
                    }
                  ]
                }
                """
    
    # Build prompt
    number_of_items = len(batch_items)
    batch_items_text = dict_to_text(batch_items)
    context = [
                {'role': 'system',
                 'content': f"{prompt_instruction}"},
                {"role": "user", "content": f"The following {number_of_items} items need to be classified and reviewed. Please return exactly {number_of_items} reviews in JSON format:\n\n{batch_items_text}"},
            ]
    return context

In [9]:
def retry(result, BATCH_SIZE, batch_items):
    if len(result["reviews"]) != BATCH_SIZE:
        print(f"Expected {len(batch_items)} reviews, but got {len(result['reviews'])} \nwhat i got : {result} , handling error")
        
        items_ids = [r["item_id"] for r in result["reviews"]] 
        rest = [item for item in batch_items if item["item_id"] not in items_ids]
    else:
        rest = None
    return rest

In [10]:
from itertools import islice

def batch_iter(iterable, BATCH_SIZE):
    it = iter(iterable)
    while True:
        batch = list(islice(it, BATCH_SIZE))
        if not batch:
            break
        yield batch

In [11]:
# use async io to send multiple batches at the same time 
# using llama.cpp with OpenAI client
async def main():
    
    all_reviews = []
    rest = None
    items_iter = batch_iter(items, BATCH_SIZE)
    
    # Initialize progress bar
    total_items = len(items)
    pbar = tqdm.tqdm(total=total_items, desc="Processing items", unit="items")
    processed_count = 0
    end = False
    while end is False:
        if rest:
            batches = [rest]
            rest = None
        else:
            # Process fewer concurrent batches for llama.cpp to avoid overwhelming the server
            batches = [list(next(items_iter, [])) for _ in range(5)]  # Reduced from 20 to 5
            batches = [b for b in batches if b]  # remove empty

        if not batches:
            break

        contexts = [createPrompt(b) for b in batches]
        responses = await asyncio.gather(*(Chat(client, ctx) for ctx in contexts), return_exceptions=True)

        for batch_items, response in zip(batches, responses):
            try:
                    
                # Parse the response content
                content = response.choices[0].message.content
                result = json.loads(content)
                
                # Validate with pydantic
                validated_response = Response.model_validate(result)
                result = json.loads(validated_response.model_dump_json())
                
                rest = retry(result, BATCH_SIZE, batch_items)
                
                # Only count as processed if no retry is needed
                if rest is None:
                    items_processed = len(batch_items)
                    processed_count += items_processed
                    pbar.update(items_processed)
                    pbar.set_postfix({"Processed": processed_count, "Reviews": len(all_reviews)})
                
                all_reviews.extend(result["reviews"])
                end = True
                break
                
            except json.JSONDecodeError as jd:
                # print(f"JSONDecodeError: {jd}")
                # print(f"Response content: {response.choices[0].message.content if hasattr(response, 'choices') else 'No content'}")
                rest = batch_items  # Retry this batch
            except ValueError as ve:
                #print(f"ValueError: {ve}")
                rest = batch_items  # Retry this batch
            except Exception as e:
                #print(f"Unexpected error: {e}")
                #print(f"Response: {response}")
                rest = batch_items  # Retry this batch

    pbar.close()
    return all_reviews

In [12]:
# Run the main function
reviews = await main()
print(f"Total reviews collected: {len(reviews)}")

Processing items:   0%|          | 5/1000000 [00:16<940:18:23,  3.39s/items, Processed=5, Reviews=0]

Total reviews collected: 5





In [None]:
item = items[0]
item

In [None]:
#send one request

async def send_request(client, item):
    prompt = createPrompt(item)
    response = await Chat(client, prompt)
    content = response.choices[0].message.content
    result = json.loads(content)
    
    # Validate with pydantic
    # validated_response = Response.model_validate(result)
    # result = json.loads(validated_response.model_dump_json())
    return result

# run 
# res = await send_request(client, [item])
# print(res)