# 1. Setup

In [None]:
!pip install --quiet langchain langchain_community langchain_ollama langmem langchain_openai

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m51.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.1/67.1 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.5/74.5 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.3/153.3 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.9/43.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.2/297.2 kB[0m [31m25.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['LANGSMITH_TRACING'] = "true"
os.environ['LANGSMITH_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGSMITH_API_KEY'] = userdata.get('LANGSMITH_API_KEY')
os.environ['LANGSMITH_PROJECT'] = "ba_thesis_grad"

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 2. Class

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

class Stage:
  def __init__(self,
               data_path:str,
               batch:int=32):
    self._data_path = data_path
    self._batch = batch
    self._df = pd.read_csv(self._data_path)
    self._list_of_dicts = self._df.to_dict(orient="records")
    self._lists_of_lists_dicts = [self._list_of_dicts[i:i+self._batch] for i in range(0, len(self._list_of_dicts), self._batch)]
    self.prompts = ["You are an assistant who will classify the email. The category is Technical Support, Product Support, Customer Service, IT Support, Billing and Payments"]
    self.prompts_backup = list()
    self.best_prompt = "You are an assistant who will classify the email. The category is Technical Support, Product Support, Customer Service, IT Support, Billing and Payments"
    self.round = 1

  def have_32_batches(self):
    return len(self.lists_of_lists_dicts) > 0
  def pop_32_batch(self):
    if len(self._lists_of_lists_dicts) > 0:
      return self._lists_of_lists_dicts.pop()
    else:
      raise Exception("No more 32 batches left")
  def pop_prompt(self):
    if len(self.prompts) > 0:
      return self.prompts.pop()
    else:
      return self.best_prompt
  def update_prompts(self, best_prompt: str, prompts: list):
    self.best_prompt = best_prompt
    self.prompts = prompts.copy()
    self.prompts_backup = prompts.copy()

  def reset_prompts(self):
    self.prompts = self.prompts_backup.copy()

In [None]:
# train_path = "/content/drive/MyDrive/ba_thesis/train_set.csv"
# test_path = "/content/drive/MyDrive/ba_thesis/test_set.csv"
# stage = Stage(train_path)

# 3. Ollama setup

# curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# !ollama serve
!ollama --version
!ollama list
# !ollama pull qwen3:0.6b
!ollama pull qwen3:0.6b
# !ollama pull llama3.2:1b
# !ollama pull gemma3:1b

ollama version is 0.11.6
NAME          ID              SIZE      MODIFIED       
qwen3:0.6b    7df6b6e09427    522 MB    20 seconds ago    
[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l[?2026h[?25l[1G[?25h[?2026l


# 4. Helper Method

In [None]:
import multiprocessing
import csv
import re

def call_with_timeout(fn, timeout, max_retries=10, *args, **kwargs):
    def wrapper(queue, *args, **kwargs):
        try:
            result = fn(*args, **kwargs)
            queue.put(result)
        except Exception as e:
            queue.put(e)

    retries = 0
    while retries < max_retries:
        queue = multiprocessing.Queue()
        process = multiprocessing.Process(target=wrapper, args=(queue, *args), kwargs=kwargs)
        process.start()
        process.join(timeout)

        if process.is_alive():
            process.terminate()
            process.join()
            print(f"Timeout after {timeout}s. Retrying... ({retries + 1}/{max_retries})")
            retries += 1
        else:
            result = queue.get()
            if isinstance(result, Exception):
                print(f"Error on attempt {retries + 1}: {result}")
                retries += 1
            else:
                return result

    raise TimeoutError(f"Function exceeded timeout or failed after {max_retries} retries.")

def answers2trajectories(answers: list):
  trajectories = []
  for item in answers:
    trajectories.append(([SystemMessage(content=item["prompt"]), HumanMessage(content=str(item["subject"])+item['body']), AIMessage(content=item["full_llm_answer"])], item["correctness"]))
  return trajectories

def append_samples_csv(new_samples, file_path):
    # Assume new_samples is a list of dicts
    file_exists = os.path.isfile(file_path)
    fieldnames = new_samples[0].keys() if new_samples else []

    with open(file_path, 'a', encoding='utf-8', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if not file_exists:
            writer.writeheader()
        for sample in new_samples:
            writer.writerow(sample)

def extract_prompts(text: str) -> list:
    """
    Extracts all content inside <prompt>...</prompt> tags from the input string.

    Args:
        text (str): The input string containing one or more <prompt>...</prompt> blocks.

    Returns:
        list: A list of strings extracted from each <prompt>...</prompt> block.
    """
    pattern = r"<prompt>(.*?)</prompt>"
    return re.findall(pattern, text, re.DOTALL)

## 4-2. Helper method: LLM

In [None]:
from pydantic import BaseModel, Field
from typing import Literal
from langchain_ollama import ChatOllama
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage


class ResponseFormat(BaseModel):
    reason: str = Field(description="The reason why you give the answer")
    answer: Literal["Technical Support", "Product Support", "Customer Service", "IT Support", "Billing and Payments"] = Field(description="The Category that this mail belongs to")

def answer(subject:str, body:str, prompt: str, local_llm:str="qwen3:0.6b"):
  llm = ChatOllama(model=local_llm, temperature=0.6)
  llm = llm.with_structured_output(ResponseFormat)
  query = 'subject: ' + str(subject) + '\nbody: ' + str(body)
  output = llm.invoke([SystemMessage(content=prompt), HumanMessage(content=query)])
  return output.answer, output.reason

In [None]:
from langmem import create_prompt_optimizer
from langchain_core.runnables import RunnableConfig
from langchain.chat_models import init_chat_model
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langmem import Prompt

def optimize(trajectories: list):
    model = init_chat_model("gpt-4.1", temperature=0.6)
    prompt_optimizer = create_prompt_optimizer(model, kind="gradient",
                                        config={"max_reflection_steps": 1, "min_reflection_steps": 0})
    prompting_instruction = "You are an assistant who will classify the email. The category is Technical Support, Product Support, Customer Service, IT Support, Billing and Payments"

    #trajectories = [(conversation, feedback)]
    #trajectories = [(conversation, None)]
    # trajectories = [(conversation_1, feedback_1), (conversation_2, feedback_2)]
    better_prompt = prompt_optimizer.invoke({"trajectories": trajectories, "prompt": prompting_instruction})

    return better_prompt

def optimize_phraseevo(trajectories: list):
    model = init_chat_model("gpt-4.1", temperature=0.6)
    prompt = Prompt(name="generate_5_prompts", prompt="You are an assistant who will classify the email. The category is Technical Support, Product Support, Customer Service, IT Support, Billing and Payments",prompting_instruction="PLEASE GENERATE FIVE PROMPTS TO COVER DIFFERENT ASPECT AND PUT EACH PROMPT IN <prompt></prompt>.")
    prompt_optimizer = create_prompt_optimizer(model, kind="gradient",
                                        config={"max_reflection_steps": 1, "min_reflection_steps": 0})

    better_prompt = prompt_optimizer.invoke({"trajectories": trajectories, "prompt": prompt})
    # better_prompts = extract_prompts(better_prompt)
    # return better_prompts

    return better_prompt

In [None]:
from concurrent.futures import ThreadPoolExecutor

def generate_multiple_prompts(trajectories: list, num_prompts: int = 5):
    with ThreadPoolExecutor(max_workers=num_prompts) as executor:
        futures = [executor.submit(optimize, trajectories) for _ in range(num_prompts)]
        results = [future.result() for future in futures]
    return results

# 5. Normal Run

In [None]:
train_path = "/content/drive/MyDrive/ba_thesis/train_set.csv"
test_path = "/content/drive/MyDrive/ba_thesis/test_set.csv"
log_path = "/content/drive/MyDrive/ba_thesis/results/qwen3_0.6b/validation/cot_24-08.log"
output_filepath = "/content/drive/MyDrive/ba_thesis/results/qwen3_0.6b/validation/cot_24-08.csv"
stage = Stage(test_path)

In [None]:
import logging
import json
from datetime import datetime

logging.basicConfig(level=logging.CRITICAL, format='%(asctime)s [%(levelname)s] %(message)s',
                    handlers=[logging.FileHandler(log_path), logging.StreamHandler()], force=True)

debug_counts = 0

email_classification_prompt = """You are an assistant who will classify the email. The category is Technical Support, Product Support, Customer Service, IT Support, Billing and Payments.
Let's think step by step:
"""




# prompt = stage.pop_prompt()
prompt = email_classification_prompt
while stage.have_32_batches:
  datalist_1x32 = stage.pop_32_batch()

  # Batch: setup
  completed_samples = list()
  # Batch operation
  for sample in datalist_1x32:
    #answer
    answer_category, answer_reason = call_with_timeout(
    answer,
    timeout=5,
    max_retries=100,
    subject=sample['subject'],
    body=sample['body'],
    prompt=prompt)
    #validation
    sample['llm_answer'] = answer_category
    sample['full_llm_answer'] = "answer: " + answer_category + "\nreason: " + answer_reason
    sample['prompt'] = prompt
    sample['round'] = stage.round
    sample['timestamp'] = datetime.now().isoformat()
    sample["correctness"] = (sample['queue'].lower() == sample['llm_answer'].lower())
    completed_samples.append(sample)
    debug_counts += 1
    if debug_counts % 100 == 0:
      logging.critical(f"Processed {debug_counts} samples")
  # === Append to a csv file & Update Round===
  append_samples_csv(completed_samples, output_filepath)

logging.critical("ALL SAMPLES PROCESSED")


2025-08-24 16:50:14,011 [CRITICAL] Processed 100 samples
2025-08-24 16:51:45,776 [CRITICAL] Processed 200 samples
2025-08-24 16:53:15,711 [CRITICAL] Processed 300 samples
2025-08-24 16:54:49,869 [CRITICAL] Processed 400 samples
2025-08-24 16:56:22,169 [CRITICAL] Processed 500 samples
2025-08-24 16:57:54,026 [CRITICAL] Processed 600 samples
2025-08-24 16:59:24,612 [CRITICAL] Processed 700 samples
2025-08-24 17:00:55,350 [CRITICAL] Processed 800 samples
2025-08-24 17:02:28,522 [CRITICAL] Processed 900 samples
2025-08-24 17:04:00,473 [CRITICAL] Processed 1000 samples
2025-08-24 17:05:34,402 [CRITICAL] Processed 1100 samples
2025-08-24 17:07:06,011 [CRITICAL] Processed 1200 samples
2025-08-24 17:08:37,786 [CRITICAL] Processed 1300 samples
2025-08-24 17:10:11,060 [CRITICAL] Processed 1400 samples
2025-08-24 17:11:44,060 [CRITICAL] Processed 1500 samples
2025-08-24 17:13:15,808 [CRITICAL] Processed 1600 samples
2025-08-24 17:14:47,821 [CRITICAL] Processed 1700 samples
2025-08-24 17:16:19,213

Exception: No more 32 batches left

# 6. Text Gradient: Train

In [None]:
train_path = "/content/drive/MyDrive/ba_thesis/train_set.csv"
test_path = "/content/drive/MyDrive/ba_thesis/test_set.csv"
log_path = "/content/drive/MyDrive/ba_thesis/results/qwen3_8b/grad_17-08.log"
output_filepath = "/content/drive/MyDrive/ba_thesis/results/qwen3_8b/grad_17-08.csv"
output_beamsearch_filepath = "/content/drive/MyDrive/ba_thesis/results/qwen3_8b/grad_beam_17-08.csv"
stage = Stage(train_path)

In [None]:
import logging
import json
from datetime import datetime

logging.basicConfig(level=logging.CRITICAL, format='%(asctime)s [%(levelname)s] %(message)s',
                    handlers=[logging.FileHandler(log_path), logging.StreamHandler()], force=True)

debug_counts = 0
while stage.have_32_batches:
  datalist_1x32 = stage.pop_32_batch()
  prompt = stage.pop_prompt()

  # Batch: setup
  completed_samples = list()
  # Batch operation
  for sample in datalist_1x32:
    #answer
    answer_category, answer_reason = call_with_timeout(
    answer,
    timeout=10,
    max_retries=100,
    subject=sample['subject'],
    body=sample['body'],
    prompt=prompt)
    #validation
    sample['llm_answer'] = answer_category
    sample['full_llm_answer'] = "answer: " + answer_category + "\nreason: " + answer_reason
    sample['prompt'] = prompt
    sample['round'] = stage.round
    sample['timestamp'] = datetime.now().isoformat()
    sample["correctness"] = (sample['queue'].lower() == sample['llm_answer'].lower())
    completed_samples.append(sample)
    debug_counts += 1
    if debug_counts % 100 == 0:
      logging.critical(f"Processed {debug_counts} samples")
  # === Append to a csv file & Update Round===
  append_samples_csv(completed_samples, output_filepath)
  stage.round += 1
  # === Update Prompt ===
  trajectories = answers2trajectories(completed_samples)
  better_prompt = optimize(trajectories)
  stage.prompts_backup.append(better_prompt)

  # === Beam Search ===
  if len(stage.prompts_backup) >= 10:
    prompts_scores = list()
    datalist_1x32 = stage.pop_32_batch()
    datalist_1x32.extend(stage.pop_32_batch())
    datalist_1x32.extend(stage.pop_32_batch())
    while len(stage.prompts_backup) > 0:
      test_completed_samples = list()
      prompt = stage.prompts_backup.pop()
      for sample in datalist_1x32:
        answer_category, answer_reason = call_with_timeout(
        answer,
        timeout=10,
        max_retries=100,
        subject=sample['subject'],
        body=sample['body'],
        prompt=prompt)
        #validation
        sample['llm_answer'] = answer_category
        sample['full_llm_answer'] = "answer: " + answer_category + "\nreason: " + answer_reason
        sample['prompt'] = prompt
        sample['round'] = stage.round
        sample['timestamp'] = datetime.now().isoformat()
        sample["correctness"] = (sample['queue'].lower() == sample['llm_answer'].lower())
        test_completed_samples.append(sample)
      append_samples_csv(test_completed_samples, output_beamsearch_filepath)
      #f1 score
      df_test_completed_samples = pd.DataFrame(test_completed_samples)
      f1_score = classification_report(df_test_completed_samples['queue'], df_test_completed_samples['llm_answer'], output_dict=True, zero_division=0)["macro avg"]["f1-score"]
      prompts_scores.append((prompt, f1_score))

    top_5_prompts_scores = [(prompt, score) for prompt, score in sorted(prompts_scores, key=lambda x: x[1], reverse=True)[:5]]
    best_prompt_score = None, 0
    for prompt, score in top_5_prompts_scores:
      if score > best_prompt_score[1]:
        best_prompt_score = (prompt, score)
    # logging.critical(f"Best Prompt: {best_prompt_score[0]}")
    logging.critical(f"Best Score: {best_prompt_score[1]}")
    stage.update_prompts(best_prompt_score[0], [prompt for prompt, score in top_5_prompts_scores])

logging.critical("ALL SAMPLES PROCESSED")


2025-08-17 10:25:25,597 [CRITICAL] Processed 100 samples
2025-08-17 10:29:20,820 [CRITICAL] Processed 200 samples
2025-08-17 10:33:05,113 [CRITICAL] Processed 300 samples
2025-08-17 11:08:24,359 [CRITICAL] Best Score: 0.3908683707596751
2025-08-17 11:12:28,274 [CRITICAL] Processed 400 samples
2025-08-17 11:51:55,590 [CRITICAL] Best Score: 0.3816307696277689
2025-08-17 11:53:31,268 [CRITICAL] Processed 500 samples
2025-08-17 11:57:55,114 [CRITICAL] Processed 600 samples
2025-08-17 12:34:29,976 [CRITICAL] Best Score: 0.4635980890294954
2025-08-17 12:37:45,722 [CRITICAL] Processed 700 samples
2025-08-17 13:15:43,647 [CRITICAL] Best Score: 0.3869305120167189
2025-08-17 13:16:25,115 [CRITICAL] Processed 800 samples
2025-08-17 13:20:49,012 [CRITICAL] Processed 900 samples
2025-08-17 13:58:39,835 [CRITICAL] Best Score: 0.3077047805474015
2025-08-17 14:01:43,174 [CRITICAL] Processed 1000 samples
2025-08-17 14:06:10,965 [CRITICAL] Processed 1100 samples
2025-08-17 14:42:31,762 [CRITICAL] Best S

Timeout after 10s. Retrying... (1/100)


2025-08-18 00:18:35,753 [CRITICAL] Best Score: 0.4432072990300838
2025-08-18 00:20:40,146 [CRITICAL] Processed 3400 samples
2025-08-18 00:25:29,443 [CRITICAL] Processed 3500 samples
2025-08-18 01:01:25,161 [CRITICAL] Best Score: 0.3495359573934921
2025-08-18 01:05:49,924 [CRITICAL] Processed 3600 samples
2025-08-18 01:44:12,452 [CRITICAL] Best Score: 0.4377335978379621
2025-08-18 01:45:54,375 [CRITICAL] Processed 3700 samples
2025-08-18 01:50:10,137 [CRITICAL] Processed 3800 samples
2025-08-18 02:24:55,225 [CRITICAL] Best Score: 0.35437181409295354
2025-08-18 02:28:01,076 [CRITICAL] Processed 3900 samples
2025-08-18 03:06:09,364 [CRITICAL] Best Score: 0.3793815874017878
2025-08-18 03:06:48,351 [CRITICAL] Processed 4000 samples
2025-08-18 03:10:52,269 [CRITICAL] Processed 4100 samples
2025-08-18 03:47:19,408 [CRITICAL] Best Score: 0.38981541334482517
2025-08-18 03:49:53,139 [CRITICAL] Processed 4200 samples
2025-08-18 03:53:35,471 [CRITICAL] Processed 4300 samples
2025-08-18 04:29:48,98

# 7. Phrase Evo: Train

In [None]:
train_path = "/content/drive/MyDrive/ba_thesis/train_set.csv"
test_path = "/content/drive/MyDrive/ba_thesis/test_set.csv"
log_path = "/content/drive/MyDrive/ba_thesis/results/qwen3_8b/phrase_16-08.log"
output_beamsearch_filepath = "/content/drive/MyDrive/ba_thesis/results/qwen3_8b/phrase_beam_16-08.csv"
stage = Stage(train_path)

In [None]:
import logging
import json
from datetime import datetime

logging.basicConfig(level=logging.CRITICAL, format='%(asctime)s [%(levelname)s] %(message)s',
                    handlers=[logging.FileHandler(log_path), logging.StreamHandler()], force=True)

debug_counts = 0
stage.prompts = ["Read the email and determine which of the following best describes the user's issue: Technical Support, Product Support, Customer Service, IT Support, or Billing and Payments.",
          "Classify this customer email into one of the following categories to ensure it is routed to the appropriate team: Technical Support, Product Support, Customer Service, IT Support, Billing and Payments.",
                 "Analyze the content of this email and assign it to the most relevant support category: Technical Support, Product Support, Customer Service, IT Support, or Billing and Payments.",
                 "Based on the customer's intent expressed in the email, classify it into one of these categories: Technical Support, Product Support, Customer Service, IT Support, Billing and Payments.",
                 "Review the following email and classify the type of service request it contains using these categories: Technical Support, Product Support, Customer Service, IT Support, Billing and Payments."]
while stage.have_32_batches:
  datalist_1x32 = stage.pop_32_batch()
  datalist_1x32.extend(stage.pop_32_batch())
  datalist_1x32.extend(stage.pop_32_batch())

  # Batch: setup
  completed_samples_1 = list()
  completed_samples_2 = list()
  completed_samples_3 = list()
  completed_samples_4 = list()
  completed_samples_5 = list()
  map_completed_samples = [
    completed_samples_1,
    completed_samples_2,
    completed_samples_3,
    completed_samples_4,
    completed_samples_5]


  while len(stage.prompts) > 0:
    prompt = stage.prompts.pop()
    temp_completed_samples = list()
    # Beam Search(1x96)
    for sample in datalist_1x32:
      #answer
      answer_category, answer_reason = call_with_timeout(
      answer,
      timeout=5,
      max_retries=100,
      subject=sample['subject'],
      body=sample['body'],
      prompt=prompt)
      #validation
      sample['llm_answer'] = answer_category
      sample['full_llm_answer'] = "answer: " + answer_category + "\nreason: " + answer_reason
      sample['prompt'] = prompt
      sample['round'] = stage.round
      sample['timestamp'] = datetime.now().isoformat()
      sample["correctness"] = (sample['queue'].lower() == sample['llm_answer'].lower())
      temp_completed_samples.append(sample)
    debug_counts += 96
    if debug_counts % 100 == 0:
      logging.critical(f"Processed {debug_counts} samples")
    # === Append to a csv file ===
    # Assign result to the first empty one
    for sample_list in map_completed_samples:
      if not sample_list:
          sample_list.extend(temp_completed_samples)
          break
    append_samples_csv(temp_completed_samples, output_beamsearch_filepath)
  # === 5 prompts beam search done & Update Round===
  stage.round += 1
  prompts_scores = list()
  best_samples = list()
  best_score = 0
  best_prompt = ""
  for samples in map_completed_samples:
    if not samples:
      continue
    df_test_completed_samples = pd.DataFrame(samples)

    f1_score = classification_report(df_test_completed_samples['queue'], df_test_completed_samples['llm_answer'], output_dict=True, zero_division=0)["macro avg"]["f1-score"]
    best_prompt = samples[0]['prompt']
    prompts_scores.append((best_prompt, f1_score))
    if f1_score > best_score:
      best_score = f1_score
      best_samples = samples
  top_1_prompts_scores = [(prompt, score) for prompt, score in sorted(prompts_scores, key=lambda x: x[1], reverse=True)[:1]]
  stage.prompts = [top_1_prompts_scores[0][0]]
  logging.critical(f"Best Score: {str(top_1_prompts_scores[0][1])}")
  # === Update Prompt ===
  trajectories = answers2trajectories(best_samples)
  better_prompts = generate_multiple_prompts(trajectories, 4)
  stage.prompts.extend(better_prompts)
  stage.prompts.append(best_prompt)

logging.critical("ALL SAMPLES PROCESSED")


Timeout after 5s. Retrying... (1/100)


Process Process-409:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipython-input-2094784163.py", line 8, in wrapper
    result = fn(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipython-input-232796065.py", line 15, in answer
    output = llm.invoke([SystemMessage(content=prompt), HumanMessage(content=query)])
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


KeyboardInterrupt: 

  File "/usr/local/lib/python3.11/dist-packages/langchain_core/runnables/base.py", line 3047, in invoke
    input_ = context.run(step.invoke, input_, config, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/langchain_core/runnables/base.py", line 5441, in invoke
    return self.bound.invoke(
           ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/langchain_core/language_models/chat_models.py", line 383, in invoke
    self.generate_prompt(
  File "/usr/local/lib/python3.11/dist-packages/langchain_core/language_models/chat_models.py", line 1006, in generate_prompt
    return self.generate(prompt_messages, stop=stop, callbacks=callbacks, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/langchain_core/language_models/chat_models.py", line 825, in generate
    self._generate_with_cache(
  File "/usr/local/

In [None]:
email_classification_prompt = """Classify the following customer emails into one of these categories:

- Technical Support
- Product Support
- Customer Service
- IT Support
- Billing and Payments

### Examples:

Email 1:
"My app keeps crashing whenever I try to upload a file. Can someone help me fix this?"
Category: Technical Support

Email 2:
"I’m trying to understand how to use the new dashboard feature. Is there a guide?"
Category: Product Support

Email 3:
"I was overcharged on my last invoice and need a refund."
Category: Billing and Payments

Email 4:
"I can’t log in to the company VPN, and I have an urgent meeting."
Category: IT Support

Email 5:
"I had a great experience with your team and just wanted to say thank you!"
Category: Customer Service
"""
