## Optimize Prompts with an OPRO implementation
### Apply it to a text classification task

OPRO:
- https://arxiv.org/pdf/2309.03409
- https://github.com/google-deepmind/opro

In [84]:
import boto3
import re
import time
import pandas as pd
from multiprocessing.pool import ThreadPool
from threading import Lock

In [85]:
DATASET_CSV = 'train.csv'
NUM_EXAMPLES = 3
NUM_ITERATIONS = 6
NUM_SAMPLES = 20

In [86]:
# Define allowed categories and seed prompts
ALLOWED_CATEGORIES = ['ADVOCATE', 'BUSINESS-DEVELOPMENT', 'CONSULTANT', 'FITNESS',
       'HEALTHCARE', 'PUBLIC-RELATIONS', 'CHEF', 'SALES', 'FINANCE',
       'ARTS', 'ENGINEERING', 'HR', 'INFORMATION-TECHNOLOGY', 'DESIGNER',
       'CONSTRUCTION', 'ACCOUNTANT', 'AVIATION', 'AGRICULTURE',
       'DIGITAL-MEDIA', 'TEACHER', 'BANKING', 'APPAREL', 'AUTOMOBILE',
       'BPO']

OUTPUT_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON document that conforms to the following JSON schema:
{"properties": {"reasoning": {"title": "Reasoning", "type": "string"}, "predicted_category": {"title": "Predicted Category", "type": "string"}}, "required": ["predicted_category"], "example": {"reasoning": "The canidadate had worked as a graphic designer at several media companies.", "predicted_category": "DESIGNER"}}
"""

SEED_PROMPTS = [
"""You are an HR recruiter. Review candidate's resume and classify the candidate to one of the following categories: {categories}.
""",
"""You are an assistant in the HR department. Classify the following resume into one of the following categories: {categories}.
""",
"""Review the following resume and assign it to one of the following categories: {categories}.
"""
]

In [87]:
# Initialize Bedrock client
bedrock = boto3.client('bedrock-runtime')

In [88]:
SONNET = 'anthropic.claude-3-sonnet-20240229-v1:0'
HAIKU = 'anthropic.claude-3-haiku-20240307-v1:0'
stats = {
    SONNET: {'in': 0, 'out': 0},
    HAIKU: {'in': 0, 'out': 0}
}
lock = Lock()

In [89]:
def trim_uppercase_words(text):
    # Remove leading whitespace
    text = text.lstrip()
    # Find the index of the first lowercase word (if any)
    pattern = r'^[A-Z/\-\s]+(\w*)'
    match = re.search(pattern, text)
    if match:
        start_index = match.end(1)
        return text[start_index:].lstrip()
    else:
        return text

In [90]:
def load_dataset():
    splits = {'train': 'train.csv', 'test': 'test.csv'}
    df = pd.read_csv("hf://datasets/Manirathinam21/Resume_classification/" + splits["train"])
    df = df.groupby('label').sample(n=5, replace=False).sample(frac=1).reset_index(drop=True)
    df['text'] = df['text'].apply(trim_uppercase_words)
    df.to_csv(DATASET_CSV, index=False)
    df.head
    return df

In [91]:
def converse(prompt, model, temperature, maxTokens):
    for i in range(3):  # retries
        try:
            response = bedrock.converse(modelId=model,
                        messages=[
                           {"role": "user", "content": [{"text": prompt}]}
                        ],
                        inferenceConfig={
                            "temperature": temperature,
                            "maxTokens": maxTokens
                        })
            with lock:
                stats[model]['in'] += response['usage']['inputTokens']
                stats[model]['out'] += response['usage']['outputTokens']
            return response['output']['message']['content'][0]['text']
        except Exception as e:
            print(e)
            time.sleep(60)
            continue
    return "Model invocation error"

def call_optimizer(prompt):
    return converse(prompt, SONNET, 1.0, 4000)

def call_model(prompt):
    return converse(prompt, HAIKU, 1.0, 1000)

def call_scorer(prompt):
    return converse(prompt, HAIKU, 0.0, 10)

In [92]:
def extract_prediction(text):
    prompt_template = """Search for the categorization result in a given text. Categories: {categories}.
    Text: {text}. Look for an exact match. Write only the extracted category. Category:"""
    prompt = prompt_template.format(categories=", ".join(ALLOWED_CATEGORIES), text=text)
    keyword = call_scorer(prompt)
    return keyword

In [93]:
def evaluate_prompt(prompt, samples):
    """Evaluate a prompt on a subset of the dataset."""
    correct = 0
    for i, sample in samples.iterrows():
        prompt = prompt.replace('{categories}', ", ".join(ALLOWED_CATEGORIES))
        full_prompt = f"{prompt}\n\text: {sample['text']}\n{OUTPUT_FORMAT_INSTRUCTIONS}"
        prediction = call_model(full_prompt).strip()
        if extract_prediction(prediction) == sample['label']:
            print("1", end="")
            correct += 1
        else:
            print("0", end="")
    return correct / len(samples)

In [94]:
def evaluate_prompts(instructions, dataset):
    try:
        n = len(instructions)
        pool = ThreadPool(n)
        samples = [dataset.sample(n=min(NUM_SAMPLES, dataset.shape[0])) for i in range(n)]
        async_result = pool.starmap_async(evaluate_prompt, zip(instructions, samples))
        results = async_result.get()
        pool.close()
        print(results)
        return results
    except Exception as e:
        print(e)
        return []

In [95]:
def extract_inst(text):
    pattern = r'<inst>(.*?)</inst>'
    matches = re.findall(pattern, text, re.DOTALL)
    return matches

def generate_instructions(meta_prompt):
    """Generate new instructions using optimizer LLM."""
    response = call_optimizer(meta_prompt)
    instructions = extract_inst(response)
    print("Generated prompts:", instructions)
    return instructions[:3]  # Limit to 3 instructions

def format_instructions(instructions, scores):
    """Format instructions with their scores for the meta-prompt."""
    return "\n".join([f"Instruction: {instr}\nScore: {score:.4f}" for instr, score in zip(instructions, scores)])

def generate_examples(dataset, num_examples):
    """Generate example texts with their categories."""
    examples = dataset.sample(n=num_examples)
    return "\n\n".join([f"Text: {ex['text']}\nCategory: {ex['label']}" for i,ex in examples.iterrows()])

In [96]:
def optimize(dataset, num_iterations=NUM_ITERATIONS):
    """Implement the OPRO algorithm for prompt optimization."""
    print("Optimizing...")
    best_instructions = SEED_PROMPTS
    best_scores = evaluate_prompts(best_instructions, dataset)
    
    for iteration in range(num_iterations):
        print(f"Iteration {iteration + 1}")
        
        # Prepare meta-prompt
        meta_prompt = f"""
        You are an AI assistant tasked with generating instructions for a text classification task.
        The goal is to classify text into one of these categories: {", ".join(ALLOWED_CATEGORIES)}.
        
        Here are some example texts with their correct categories:
        <examples>
        {generate_examples(dataset, NUM_EXAMPLES)}
        </examples>
        
        Previous instructions and their scores (higher is better):
        <previous_instructions>
        {format_instructions(best_instructions, best_scores)}
        </previous_instructions>
        
        Please generate 3 new instruction sets that could potentially achieve higher accuracy.
        Each instruction-set should be enclosed inside <inst></inst> tags.
        Leverage previous instructions and examples to enrich category descriptions and strive for better classification results.
        """
        
        new_instructions = generate_instructions(meta_prompt)
        new_scores = evaluate_prompts(new_instructions, dataset)
        
        # Update best instructions
        combined = list(zip(best_instructions + new_instructions, best_scores + new_scores))
        combined.sort(key=lambda x: x[1], reverse=True)
        best_instructions = [x[0] for x in combined[:20]]  # keep 20
        best_scores = [x[1] for x in combined[:20]]  # keep 20
        
        print(f"Best instruction: {best_instructions[0]}")
        print(f"Best score: {best_scores[0]}")
        print()
    
    return best_instructions[0], best_scores[0]

In [97]:
%%time
# if __name__ == "__main__":
dataset = load_dataset()
best_instruction, best_score = optimize(dataset)
print(f"Final best instruction: {best_instruction}")
print(f"Final best score: {best_score}")
print(f"Stats: {stats}")

Optimizing...
111011011100001111111001011010100001111000000000001111111011[0.45, 0.65, 0.55]
Iteration 1
Generated prompts: ["You are an AI resume classifier tasked with categorizing resumes into one of the following job roles: {categories}. Analyze the candidate's skills, experience, education, and qualifications to determine the most suitable category for their profile. Draw insights from the provided examples to accurately match resumes to the relevant job category.", "Your role is to develop a resume classification system that can assign incoming resumes to one of the following career fields: {categories}. Carefully review the candidate's background, including their work history, technical skills, accomplishments, and educational training. Utilize the example resumes as a reference to identify key patterns and accurately map new resumes to the corresponding job category.", "As an intelligent resume parser, you must categorize the following resume into one of these occupational cate