In [1]:
%load_ext autoreload
%autoreload 2

# Text Classification

# Kaggle E-commerce Dataset

In [2]:
import pandas as pd

df_data = pd.read_pickle('./data/Ecommerce/ecommerce_classification_dataset.pkl')
possible_labels = list(df_data['label'].unique())
df_data

Unnamed: 0,label,text
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...
...,...,...
50420,Electronics,Strontium MicroSD Class 10 8GB Memory Card (Bl...
50421,Electronics,CrossBeats Wave Waterproof Bluetooth Wireless ...
50422,Electronics,Karbonn Titanium Wind W4 (White) Karbonn Titan...
50423,Electronics,"Samsung Guru FM Plus (SM-B110E/D, Black) Colou..."


## Train - test split

In [3]:
from AutoLLM.utils.helpers import split_dataframe

df_train, df_test = split_dataframe(df_data, 10 / df_data.shape[0], 42, 'label')
display(df_test)
display(df_train)

Unnamed: 0,label,text
13603,Household,"Pigeon Popcorn Maker, 1200watts, Yellow No mov..."
44708,Electronics,boAt BassHeads 225 in-Ear Super Extra Bass Hea...
13340,Household,Prestige PIC 3.1 V3 2000-Watt Induction Cookto...
26583,Books,Think & Grow Rich - Lectures by Napoleon Hill ...
44130,Electronics,"DeckUp Meritus-S Wall TV Unit (Dark Wenge, Mat..."
18908,Household,"Black+Decker Hand Tool Kit (108-Piece), Orange..."
20173,Books,Alibaba: The House that Jack Ma Built Review “...
31485,Clothing & Accessories,Baby Boy/Girl Romper Newborn Jumpsuit Blue Hoo...
34088,Clothing & Accessories,S4S Men's 100% Cotton Premium Collection Handk...
11222,Household,Kraft Seeds Multipurpose Kitchen Household and...


Unnamed: 0,label,text
24728,Books,UGC Net Education About the Author An editoria...
29641,Books,Oswaal Karnataka SSLC Question Bank Class 10 S...
9408,Household,Chef Direct Stainless Steel Dome Lid with Knob...
6530,Household,"JDX Reliance Fiber Filler Cushion, 16X16 Inch,..."
5667,Household,Magideal 2x Silky Soft Satin Standard Pillow C...
...,...,...
46826,Electronics,Canon EOS 1500D Digital SLR Camera (Black) wit...
15524,Household,Philips GC504/35 1600-Watt Garment Steamer (Pi...
48524,Electronics,LONPOO Compact HD DVD Player All Region Free (...
49605,Electronics,Intex IT-PB12.5K 12500 mAH Power Bank (Black-R...


# Classifer

## Build classifier agent

In [7]:
from pydantic import BaseModel
from typing import Literal
from AutoLLM.interfaces.api_client import APIClient
from config import API_KEY
from AutoLLM.prompts.classifier import classifier_template
from AutoLLM.modules.base_agent import BaseAgent
import json
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score

url = "https://api.studio.nebius.ai/v1/"

classifier_client = APIClient(url=url, api_key=API_KEY, model="meta-llama/Llama-3.2-3B-Instruct")
classifier_gen_config = {
    "temperature": 1e-10,
    "top_p": 1e-10,
    # "top_k": 1,
}
classifier_client.load_generation_config(classifier_gen_config)

class classifier_schema(BaseModel):
    label: str

class ClassifierAgent(BaseAgent):
    def __init__(self, client, json_schema, gen_config):
        super().__init__(client, json_schema, gen_config)
        self.template = classifier_template
        self.guide = '{"label": '
        self.output_format = f"""label: (Literal) Return one of the choices from the following list: {possible_labels}."""
        self.system_message = "You are a helpful AI assistant."
        self.instructions = ""
        self.X = None
        self.y_true = None
    
    def _generate_prompt(self, **kwargs):
        if not self.instructions:
            if 'instructions' in kwargs:
                self.instructions = kwargs['instructions']
            else:
                raise ValueError("Instructions not set.")

        self.user_prompt = self.template.format(
            instructions=self.instructions,
            output_format=self.output_format,
            input=kwargs['input']
        )
        messages = [
            {"role": "system", "content": self.system_message},
            {"role": "user", "content": self.user_prompt},
            {"role": "assistant", "content": self.guide},
        ]
        return messages
    
    def _parse_response(self, response):
        return json.loads(response)['label']

    def run_samples(self, X):
        y_pred = []
        for input in tqdm(X):
            label = self.run(input=input)
            y_pred.append(label)
        return y_pred
    
    def evaluate_accuracy(self, X=None, y_true=None):
        if X is None or y_true is None:
            X = self.X
            y_true = self.y_true
        y_pred = self.run_samples(X)
        return accuracy_score(y_true, y_pred)
    
    def load_data(self, X, y_true):
        self.X = X
        self.y_true = y_true

## Test classifier agent

In [8]:
initial_instruction = f"Based on the given input item description, label the item as one of the following: {possible_labels}."

ca = ClassifierAgent(classifier_client, classifier_schema, classifier_gen_config)
ca.instructions = initial_instruction
print(ca.run_samples(df_test['text'].to_list()))
print(ca.evaluate_accuracy(
    X=df_test['text'],
    y_true=df_test['label']
))

  0%|          | 0/10 [00:00<?, ?it/s]

['Household', 'Electronics', 'Electronics', 'Books', 'Household', 'Household', 'Household', 'Household', 'Clothing & Accessories', 'Household']


  0%|          | 0/10 [00:00<?, ?it/s]

0.6


# Optimize prompt

In [11]:
from AutoLLM.modules.mutation_agent import MutationAgent
from AutoLLM.modules.critic_agent import CriticAgent
from AutoLLM.modules.refine_agent import RefineAgent
from AutoLLM.modules.expert_agent import ExpertAgent

meta_client = APIClient(url=url, api_key=API_KEY, model="Qwen/Qwen2.5-32B-Instruct")
meta_generation_config = {
    "temperature": 0.7,
    "top_p": 0.9,
}


class OptimusPrompt:
    def __init__(
        self,
        task_description,
        initial_instruction,
        meta_client,
        meta_generation_config,
        num_mutation_variations=10,
        num_refine_variations=5,
    ):
        self.task_description = task_description
        self.initial_instruction = initial_instruction
        self.instruction_population = []
        self.mutation_agent = MutationAgent(
            meta_client,
            meta_generation_config,
        )
        self.num_mutation_variations = num_mutation_variations
        self.num_refine_variations = num_refine_variations
        self.instruction_cache = [self.initial_instruction]
        self.critic_agent = CriticAgent(meta_client, meta_generation_config)
        self.refine_agent = RefineAgent(meta_client, meta_generation_config)
        self.expert_agent = ExpertAgent(meta_client, meta_generation_config)
    
    def mutate(self, seed_instruction):
        mutations = self.mutation_agent.run(
            task_description=self.task_description,
            num_variations=self.num_mutation_variations,
            seed_instruction=seed_instruction,
        )
        return mutations
    
    def score_cache(self, eval_func):
        for instruction in self.instruction_cache:
            score = eval_func(instruction)
            self.instruction_population.append((instruction, score))
        self.instruction_cache = []
    
    def select_top_k_instructions(self, k):
        self.instruction_population.sort(key=lambda x: x[1], reverse=True)
        return [instruction for instruction, _ in self.instruction_population[:k]]
    
    def build_wrong_example_text(self, wrong_examples):
        wrong_examples_text = []
        for text, label_true, label_pred in wrong_examples:
            wrong_examples_text.append(f"""
        [Text]:               {text}
        [Correct Label]:      {label_true}
        [Predicted Label]:    {label_pred}""")
        return "\n".join(wrong_examples_text)
    
    def critique(self, seed_instruction, wrong_example_text):
        critique = self.critic_agent.run(
            task_description=self.task_description,
            seed_instruction=seed_instruction,
            wrong_examples=wrong_example_text,
        )
        return critique
    
    def build_wrong_example_with_critique_text(self, wrong_examples, critique):
        wrong_examples_with_critique = []
        for i in range(len(wrong_examples)):
            
            wrong_examples_with_critique.append(f"""
                [Text]:               {wrong_examples[i][0]}
                [Correct Label]:      {wrong_examples[i][1]}
                [Predicted Label]:    {wrong_examples[i][2]}
                [Critique]:           {critique[i]}""")
        return "\n".join(wrong_examples_with_critique)
            

    def refine(self, seed_instruction, wrong_examples_with_critique):
        refined_instructions = self.refine_agent.run(
            task_description=self.task_description,
            instruction=seed_instruction,
            examples=wrong_examples_with_critique,
            num_variations=self.num_refine_variations,
        )
        return refined_instructions
        
    def get_expert(self, seed_instruction):
        expert = self.expert_agent.run(
            instruction=seed_instruction,
        )
        return expert


# Optimization loop

In [10]:
task_description = "Label E-commerce products as their product types given their product description."
optimus_prime = OptimusPrompt(
    task_description=task_description,
    initial_instruction=initial_instruction,
    meta_client=meta_client,
    meta_generation_config=meta_generation_config,
    num_mutation_variations=10,
    num_refine_variations=5
)

def eval_func(instruction):
    ca.instructions = instruction
    return ca.evaluate_accuracy(X=df_test['text'], y_true=df_test['label'])

X = df_test['text'].to_list()
y_true = df_test['label'].to_list()

num_rounds = 1
for i in tqdm(range(num_rounds)):
    print(f"Round {i+1}")
    
    # score cached instructions
    optimus_prime.score_cache(eval_func=eval_func)

    # select best instructions
    best_instruction = optimus_prime.select_top_k_instructions(k=1)
    
    # mutate best instruction
    optimus_prime.mutate(seed_instruction=best_instruction)

    # critique best instruction
    y_pred = ca.run_samples(X)
    wrong_examples = []
    for i in range(len(y_pred)):
        if y_pred[i] != y_true[i]:
            wrong_examples.append((X[i], y_true[i], y_pred[i]))
    wrong_example_text = optimus_prime.build_wrong_example_text(wrong_examples)
    optimus_prime.critique(wrong_examples=wrong_examples)


NameError: name 'task_description' is not defined