In [53]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Text Classification

# Kaggle E-commerce Dataset

In [54]:
import pandas as pd

df_data = pd.read_pickle('./data/Ecommerce/ecommerce_classification_dataset.pkl')
possible_labels = list(df_data['label'].unique())
df_data

Unnamed: 0,label,text
0,Household,Paper Plane Design Framed Wall Hanging Motivat...
1,Household,"SAF 'Floral' Framed Painting (Wood, 30 inch x ..."
2,Household,SAF 'UV Textured Modern Art Print Framed' Pain...
3,Household,"SAF Flower Print Framed Painting (Synthetic, 1..."
4,Household,Incredible Gifts India Wooden Happy Birthday U...
...,...,...
50420,Electronics,Strontium MicroSD Class 10 8GB Memory Card (Bl...
50421,Electronics,CrossBeats Wave Waterproof Bluetooth Wireless ...
50422,Electronics,Karbonn Titanium Wind W4 (White) Karbonn Titan...
50423,Electronics,"Samsung Guru FM Plus (SM-B110E/D, Black) Colou..."


## Train - test split

In [55]:
from AutoLLM.utils.helpers import split_dataframe

df_train, df_test = split_dataframe(df_data, 100 / df_data.shape[0], 123, 'label')
df_train, df_val = split_dataframe(df_train, 20 / df_train.shape[0], 123, 'label')
display(df_test)
display(df_val)

Unnamed: 0,label,text
730,Household,Nisha Furniture Sheesham Wood Bedside Table fo...
9433,Household,Copper Chef 9.5 Inch Square Frying Pan With Li...
21160,Books,History of the Conflict Between Religion and S...
22895,Books,One Day at Horrorland (Goosebumps - 16)
43246,Electronics,"Lenovo Tab M10 Tablet (10.1 inch, 32GB, Wi-Fi ..."
...,...,...
17464,Household,Bathla Advance 5-Step Foldable Aluminium Ladde...
36268,Clothing & Accessories,Janasya Women's Crepe A-Line Kurta Shop from a...
43653,Electronics,HP HP 20-c103in 19.45-inch All-in-One Desktop ...
31256,Clothing & Accessories,All 4 All Ultimate Spiderman Gloves with Disc ...


Unnamed: 0,label,text
44729,Electronics,Sonilex SL-413 Rechargeable FM Radio with USB/...
24605,Books,"Guru Nanak - The First Sikh Guru, Set of Five ..."
38604,Clothing & Accessories,Royal Son Rimless Rectangular Spectacle Frame ...
10389,Household,Expresso 6-Sided Stainless Steel Grater and Sl...
32077,Clothing & Accessories,Avaatar Navy Blue Hooded Superhero Sweatshirt ...
714,Household,Athena Creations Double Bed Foldable Mosquito ...
1921,Household,TITTLE Silky Beans 4 KG Premium A-Grade for Be...
14702,Household,OneAssist 2 Years Extended Warranty Pro Plus p...
8413,Household,Aristo Swing Lid Garbage Waste Dustbin 32 Ltr ...
42047,Electronics,Canon Pixma G1010 Single Function Ink Tank Col...


# Classifer

## Build classifier agent

In [56]:
from pydantic import BaseModel
from typing import Literal
from AutoLLM.interfaces.api_client import APIClient
from config import API_KEY
from AutoLLM.prompts.classifier import classifier_template
from AutoLLM.modules.base_agent import BaseAgent
import json
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score

url = "https://api.studio.nebius.ai/v1/"

classifier_client = APIClient(url=url, api_key=API_KEY, model="meta-llama/Llama-3.2-3B-Instruct")
# classifier_client = APIClient(url=url, api_key=API_KEY, model="meta-llama/Llama-3.2-1B-Instruct")
classifier_gen_config = {
    "temperature": 1e-10,
    # "top_p": 1e-10,
    # "top_k": 1,
}
classifier_client.load_generation_config(classifier_gen_config)

class classifier_schema(BaseModel):
    label: str

class ClassifierAgent(BaseAgent):
    def __init__(self, client, json_schema, gen_config):
        super().__init__(client, json_schema, gen_config)
        self.template = classifier_template
        self.guide = '{"label": '
        self.output_format = f"""label: (Literal) Return one of the choices from the following list: {possible_labels}."""
        self.system_message = "You are a helpful AI assistant."
        self.instructions = ""
        self.X = None
        self.y_true = None
    
    def _generate_prompt(self, **kwargs):
        if not self.instructions:
            if 'instructions' in kwargs:
                self.instructions = kwargs['instructions']
            else:
                raise ValueError("Instructions not set.")

        self.user_prompt = self.template.format(
            instructions=self.instructions,
            output_format=self.output_format,
            input=kwargs['input']
        )
        messages = [
            {"role": "system", "content": self.system_message},
            {"role": "user", "content": self.user_prompt},
            {"role": "assistant", "content": self.guide},
        ]
        return messages
    
    def _parse_response(self, response):
        try:
            resp = json.loads(response)['label']
        except json.JSONDecodeError or KeyError:
            print(response)
            assert False
        return resp

    def run_samples(self, X):
        y_pred = []
        for input in tqdm(X, desc="Running classifier"):
            label = self.run(input=input)
            y_pred.append(label)
        return y_pred
    
    def evaluate_accuracy(self, X=None, y_true=None):
        if X is None or y_true is None:
            X = self.X
            y_true = self.y_true
        y_pred = self.run_samples(X)
        return accuracy_score(y_true, y_pred), y_pred
    
    def load_data(self, X, y_true):
        self.X = X
        self.y_true = y_true

## Test classifier agent

In [None]:
initial_instruction = f"Label as {', '.join(possible_labels)}."

ca = ClassifierAgent(classifier_client, classifier_schema, classifier_gen_config)
ca.instructions = initial_instruction
# print(ca.run_samples(df_test['text'].to_list()))
print(ca.evaluate_accuracy(
    X=df_test['text'],
    y_true=df_test['label']
))

Running classifier:   0%|          | 0/100 [00:00<?, ?it/s]

(0.88, ['Household', 'Household', 'Books', 'Books', 'Electronics', 'Household', 'Books', 'Books', 'Clothing & Accessories', 'Household', 'Electronics', 'Household', 'Household', 'Clothing & Accessories', 'Books', 'Electronics', 'Household', 'Books', 'Electronics', 'Clothing & Accessories', 'Books', 'Household', 'Books', 'Clothing & Accessories', 'Electronics', 'Household', 'Electronics', 'Electronics', 'Household', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Electronics', 'Household', 'Electronics', 'Books', 'Household', 'Household', 'Household', 'Household', 'Books', 'Household', 'Household', 'Books', 'Electronics', 'Household', 'Books', 'Electronics', 'Clothing & Accessories', 'Clothing & Accessories', 'Household', 'Electronics', 'Electronics', 'Books', 'Clothing & Accessories', 'Household', 'Electronics', 'Clothing & Accessories', 'Household', 'Clothing & Accessories', 'Electronics', 'Household', 'Electronics', 'Clothing & A

# Optimize prompt

In [64]:
from AutoLLM.modules.optimization import OptimusPrompt

meta_client = APIClient(url=url, api_key=API_KEY, model="Qwen/Qwen2.5-32B-Instruct")
meta_generation_config = {
    "temperature": 0.7,
    "top_p": 0.9,
}

# Optimization loop

In [65]:
import random

task_description = "Label E-commerce products as their product types given their product description."
optimus_prime = OptimusPrompt(
    task_description=task_description,
    meta_client=meta_client,
    meta_generation_config=meta_generation_config,
    num_mutation_variations=5,
    num_refine_variations=5,
    num_wrong_examples=5,
)

def eval_func(instruction):
    ca.instructions = instruction
    return ca.evaluate_accuracy(X=df_test['text'], y_true=df_test['label'])

X = df_val['text'].to_list()
y_true = df_val['label'].to_list()

cache = []
# score initial instruction
ca.instructions = initial_instruction
f1, y_pred = ca.evaluate_accuracy(X, y_true)
optimus_prime.instruction_population.append((f1, initial_instruction, y_pred))

# select best instructions
best_f1, best_instruction, best_y_pred = optimus_prime.select_top_k(k=1)[0]

num_rounds = 2

progress_bar = tqdm(range(num_rounds), desc="Running optimization")
for i in progress_bar:
    progress_bar.set_description(f"Running trial {i + 1}")
    print(f"Trial {i+1}")
    
    # mutate best instruction
    mutations = optimus_prime.mutate(seed_instruction=best_instruction)
    cache += mutations
    print(f"{len(mutations)} mutations generated")

    # critique best instruction
    
    wrong_examples = []
    for i in range(len(best_y_pred)):
        if best_y_pred[i] != y_true[i]:
            wrong_examples.append((X[i], y_true[i], best_y_pred[i]))
    wrong_examples = random.sample(wrong_examples, min(len(wrong_examples), optimus_prime.num_wrong_examples))

    wrong_example_text = optimus_prime.build_wrong_example_text(wrong_examples)
    critique = optimus_prime.critique(wrong_example_text=wrong_example_text, seed_instruction=best_instruction)
    print(f'{len(critique)} critiques generated')

    wrong_example_with_critique = optimus_prime.build_wrong_example_with_critique_text(wrong_examples, critique)
    print('Critique completed')
    print(wrong_example_with_critique)

    # refine best instruction
    refined_instructions = optimus_prime.refine(seed_instruction=best_instruction, wrong_examples_with_critique=wrong_example_with_critique)
    print(refined_instructions)
    print('Refine completed')

    # add refined instructions to instruction cache
    cache += refined_instructions

    # score cached instructions
    print('Scoring cached instructions')
    for instruction in cache:
        ca.instructions = instruction
        f1, y_pred = ca.evaluate_accuracy(X, y_true)
        optimus_prime.instruction_population.append((f1, instruction, y_pred))
    cache = []

    # select best instructions
    best_f1, best_instruction, best_y_pred = optimus_prime.select_top_k(k=1)[0]
    system_message = optimus_prime.get_expert(seed_instruction=best_instruction)

    print("Final Evaluation")
    ca.instructions = best_instruction
    ca.system_message = system_message
    print(f"Accuracy = {ca.evaluate_accuracy(X=df_test['text'].to_list(), y_true=df_test['label'].to_list())}")



print(system_message)
print(best_instruction)




Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running optimization:   0%|          | 0/2 [00:00<?, ?it/s]

Trial 1
5 mutations generated
LLM thinking: The current instruction to label products as Household, Books, Clothing & Accessories, or Electronics seems too broad and does not provide enough context for the agent to accurately classify the examples given. The agent is likely focusing on keywords that are directly related to the categories provided, such as 'USB Cable' for Electronics and 'Clock' for Electronics, which are not always accurate indicators of the correct product type. A more detailed and nuanced instruction is needed to ensure accurate classification.
3 critiques generated
Critique completed

                [Text]:               ShineKee Mini USB Cable 50ft, Shinekee Mini 5 Pin USB Cable A to Mini B Cable Male Cord for Gopro Hero 3+, Hero Hd, Cell Phones, Mp3 Players, Dash Cam, Digital Cameras, Sat Navigation, GPS Receiver, This USB 2.0 Type A male to Mini 5-pin cable features a standard Type-A USB connector on one end and a Mini Type-B connector on the other. You can plug

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

LLM thinking: The instruction involves categorizing items or concepts into predefined categories such as Household, Books, Clothing & Accessories, and Electronics. This type of task requires an individual with strong organizational skills and a good understanding of consumer goods and their classifications. Someone working in retail or inventory management would likely possess these skills, as they often need to classify and organize products for inventory or sales purposes.
Final Evaluation


Running classifier:   0%|          | 0/100 [00:00<?, ?it/s]

Accuracy = (0.83, ['Household', 'Household', 'Books', 'Books', 'Electronics', 'Household', 'Books', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Household', 'Clothing & Accessories', 'Household', 'Household', 'Household', 'Books', 'Household', 'Household', 'Books', 'Household', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Electronics', 'Electronics', 'Household', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Household', 'Household', 'Household', 'Household', 'Books', 'Household', 'Household', 'Books', 'Household', 'Household', 'Books', 'Household', 'Clothing & Accessories', 'Household', 'Household', 'Household', 'Electronics', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Clothing & Accessories', 'Household', 'Clothing & Accessories', 'Electronics', 'Household', 'Electronics', 'Clothing & Accessories', 'Electronics', 'Ele

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

Running classifier:   0%|          | 0/20 [00:00<?, ?it/s]

LLM thinking: To execute this instruction effectively, the agent needs to have a detailed understanding of product categories and the characteristics that define them. An ideal agent would be someone with experience in retail or e-commerce, particularly in product classification and inventory management. This individual should have knowledge about various materials and components used in different products, which would help in making accurate categorizations. Additionally, the agent should possess analytical skills to interpret the presence of certain indicators and assign the appropriate labels with precision.
Final Evaluation


Running classifier:   0%|          | 0/100 [00:00<?, ?it/s]

Accuracy = (0.92, ['Household', 'Household', 'Books', 'Books', 'Electronics', 'Household', 'Electronics', 'Books', 'Clothing & Accessories', 'Household', 'Electronics', 'Household', 'Household', 'Clothing & Accessories', 'Books', 'Household', 'Household', 'Books', 'Electronics', 'Clothing & Accessories', 'Books', 'Household', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Electronics', 'Electronics', 'Household', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Household', 'Household', 'Household', 'Household', 'Books', 'Household', 'Household', 'Books', 'Electronics', 'Household', 'Books', 'Electronics', 'Clothing & Accessories', 'Clothing & Accessories', 'Household', 'Household', 'Electronics', 'Books', 'Clothing & Accessories', 'Household', 'Electronics', 'Clothing & Accessories', 'Household', 'Clothing & Accessories', 'Electronics', 'Household', 'Electronics', 

In [66]:
print("Final Evaluation")
ca.instructions = best_instruction
ca.system_message = system_message
X = df_test['text'].to_list()
y_true = df_test['label'].to_list()
print(f"Accuracy = {ca.evaluate_accuracy(X=X, y_true=y_true)}")

Final Evaluation


Running classifier:   0%|          | 0/100 [00:00<?, ?it/s]

Accuracy = (0.92, ['Household', 'Household', 'Books', 'Books', 'Electronics', 'Household', 'Electronics', 'Books', 'Clothing & Accessories', 'Household', 'Electronics', 'Household', 'Household', 'Clothing & Accessories', 'Books', 'Household', 'Household', 'Books', 'Electronics', 'Clothing & Accessories', 'Books', 'Household', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Electronics', 'Electronics', 'Household', 'Books', 'Clothing & Accessories', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Household', 'Household', 'Electronics', 'Household', 'Household', 'Household', 'Household', 'Household', 'Books', 'Household', 'Household', 'Books', 'Electronics', 'Household', 'Books', 'Electronics', 'Clothing & Accessories', 'Clothing & Accessories', 'Household', 'Household', 'Electronics', 'Books', 'Clothing & Accessories', 'Household', 'Electronics', 'Clothing & Accessories', 'Household', 'Clothing & Accessories', 'Electronics', 'Household', 'Electronics', 

In [67]:
for i in optimus_prime.instruction_population:
    print(i[0], i[1])

0.9 Use indicators such as the presence of electronic components or the material of clothing to label products as Household, Books, Clothing & Accessories, or Electronics with precision.
0.85 Label as Household,Books,Clothing & Accessories,Electronics.
0.85 Create a comprehensive list including Household, Books, Clothing & Accessories, and Electronics, then review each product description to determine the most fitting category. This approach ensures a thorough understanding of the product types.
0.85 Given the urgency of categorizing new products, quickly scan each description and label as either Household, Books, Clothing & Accessories, or Electronics, prioritizing speed while maintaining accuracy.
0.85 Based on previous experiences where products were misclassified, leading to customer confusion, label products as Household, Books, Clothing & Accessories, or Electronics to ensure accurate categorization.
0.85 Consider the potential risks of incorrect labeling, such as decreased custo

In [62]:
print(best_instruction)

Label as: ['Household', 'Books', 'Clothing & Accessories', 'Electronics']. Consider the primary function and context of use for the product described. If it's a device used for home purposes, label as Household; if it's a manual or guide, label as Books; if it's an electronic device for computing, printing, or similar functions, label as Electronics; otherwise, label according to the product type.


# END