# predict

> Use chroma, labeled instances, an LLM to predict email classification

In [1]:
#| default_exp predict

In [17]:
#| export
from pathlib import Path
import json

from langchain.schema import Document
from langchain.prompts import PromptTemplate

from classifier.schema import predict
from classifier.load import get_possible_labels
from classifier.chroma import get_or_make_chroma, concat_email_summaries

In [4]:
data_dir = Path("../data")
assert data_dir.exists()

Load chroma with embedded summaries of labeled emails

In [5]:
chroma = get_or_make_chroma(data_dir)

Load our summaries

In [11]:
summary_path = data_dir / "summaries.json"
assert summary_path.exists()
with summary_path.open('r') as f:
    summary_json = json.load(f)

In [13]:
summaries = concat_email_summaries(summary_json)
summaries[0]

Document(page_content='The email is requesting a drop ship order for Ohio State University.\nThe PO number is 7004014842, the account number is 2150126632, and the store number is 16422.\nThe drug name is EPIDIOLEX 100MG/ML SOL 100ML, the order quantity is 5, and the prescriber names are LUCRETIA LONG and PHILIP CLAYTON JONAS.\nThe prescriber NPIs or DEAs are ML0822634 and FJ142\n', metadata={'idx': 0, 'label': 'Order Processing'})

## Prediction prompt

In [18]:
labels = get_possible_labels()
labels

['Order Processing',
 'Product Inquiry',
 'Account/Inquiry',
 'General Inquiry',
 'Returns',
 'Billing / Invoice',
 'Delivery',
 'Credits',
 'Order Discrepancy',
 'Pricing',
 'Program / Promotions']

In [31]:
#| export
LABEL_STR = """- Order Processing
- Product Inquiry
- Account/Inquiry
- General Inquiry
- Returns
- Billing / Invoice
- Delivery
- Credits
- Order Discrepancy
- Pricing
- Program / Promotions
"""

PREDICTION_PROMPT_TEMPLATE = """Classify the following email into one of these categories:""" + \
    LABEL_STR + """\nEMAIL: {email}\nHere are some similar emails and their labels:{examples}
    Classification: """

PREDICTION_PROMPT = PromptTemplate.from_template(PREDICTION_PROMPT_TEMPLATE)

In [32]:
PREDICTION_PROMPT

PromptTemplate(input_variables=['email', 'examples'], template='Classify the following email into one of these categories:- Order Processing\n- Product Inquiry\n- Account/Inquiry\n- General Inquiry\n- Returns\n- Billing / Invoice\n- Delivery\n- Credits\n- Order Discrepancy\n- Pricing\n- Program / Promotions\n\nEMAIL: {email}\nHere are some similar emails and their labels:{examples}\n    Classification: ')

In [15]:
#| export
def predict(document: Document) -> str:
    pass

In [33]:
#| hide
import nbdev; nbdev.nbdev_export()