In [1]:
!pip install adaptive-classifier

Collecting adaptive-classifier
  Downloading adaptive_classifier-0.0.7-py3-none-any.whl.metadata (11 kB)
Collecting faiss-cpu>=1.7.4 (from adaptive-classifier)
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->adaptive-classifier)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->adaptive-classifier)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->adaptive-classifier)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->adaptive-classifier)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu

In [2]:
import torch
import torch.nn as nn
from typing import List, Tuple, Set
from adaptive_classifier import AdaptiveClassifier

class MultiLabelClassifier:
    """Wrapper for adaptive classifier to support multi-label classification."""

    def __init__(
        self,
        model_name: str = "distilbert/distilbert-base-uncased",
        threshold: float = 0.3,
        min_probability_diff: float = 0.1
    ):
        """Initialize multi-label classifier.

        Args:
            model_name: Name of the transformer model to use
            threshold: Minimum probability threshold for accepting a label
            min_probability_diff: Minimum difference between probabilities to consider them distinct
        """
        config = {
            'learning_rate': 0.00005,
            'prototype_update_frequency': 25,
            'similarity_threshold': 0.4
        }
        self.classifier = AdaptiveClassifier(model_name, config=config)
        self.threshold = threshold
        self.min_probability_diff = min_probability_diff

    def add_examples(self, texts: List[str], label_sets: List[Set[str]]):
        """Add examples with multiple labels.

        Args:
            texts: List of text examples
            label_sets: List of sets of labels for each example
        """
        flat_texts = []
        flat_labels = []

        for text, label_set in zip(texts, label_sets):
            for label in label_set:
                flat_texts.append(text)
                flat_labels.append(label)

        print(f"Adding {len(flat_texts)} examples with labels:", set(flat_labels))
        self.classifier.add_examples(flat_texts, flat_labels)

    def predict(self, text: str) -> List[Tuple[str, float]]:
        """Predict multiple labels for a text.

        Args:
            text: Input text

        Returns:
            List of (label, confidence) tuples for predicted labels
        """
        with torch.no_grad():
            predictions = self.classifier.predict(text)

        selected_labels = []
        prev_prob = 1.0

        for label, prob in predictions:
            if prob < self.threshold:
                continue

            if (prev_prob - prob) < self.min_probability_diff:
                selected_labels.append((label, prob))

            prev_prob = prob

        return selected_labels

# Define categories
CATEGORIES = [
    "Payment Inquiry", "Past Due Inquiry", "PO Inquiry", "Non PO Inquiry", "PO Invoice", "Non PO Invoice", "Tax Exemption",
    "Vendor Master Inquiry", "ACH Inquiry", "Advertisement and Auto-reply", "POD/PS", "Report Inquiry", "Statement",
    "HTMB/HTMI", "CC Email", "Carrollton", "CBRE", "USPI", "Reclass Request", "Summary Billing", "T & E", "Voicemail"
]

# Initialize classifier
print("Initializing classifier...")
classifier = MultiLabelClassifier(threshold=0.1, min_probability_diff=0.05)

# Sample Training Data
test_texts = [
    "Invoice #INV-12345 is past due, please confirm the payment status.",
    "We need the PO details for order #PO-98765.",
    "Please process the attached Non-PO invoice for payment.",
    "Our vendor statement for January is attached.",
    "Requesting a reclassification for invoice #INV-56897.",
    "Can you confirm the tax exemption on invoice #INV-99876?",
    "Vendor Master record needs updating.",
    "ACH payment not received for invoice #INV-77542.",
    "Automated response: Out of office.",
    "Requesting Proof of Delivery (POD) for shipment #SHIP-45678.",
    "Internal report request for last quarter.",
    "HTMB/HTMI request for transaction review.",
    "CC Email: Adding accounts payable team for visibility.",
    "Carrollton location invoice inquiry.",
    "CBRE invoice processing status request.",
    "USPI vendor payment verification.",
    "Summary billing details required for last month.",
    "Travel and expense reimbursement inquiry.",
    "Voicemail transcript for invoice query."
]
test_labels = [
    {"Past Due Inquiry"}, {"PO Inquiry"}, {"Non PO Invoice"}, {"Statement"}, {"Reclass Request"},
    {"Tax Exemption"}, {"Vendor Master Inquiry"}, {"ACH Inquiry"}, {"Advertisement and Auto-reply"},
    {"POD/PS"}, {"Report Inquiry"}, {"HTMB/HTMI"}, {"CC Email"}, {"Carrollton"}, {"CBRE"}, {"USPI"},
    {"Summary Billing"}, {"T & E"}, {"Voicemail"}
]

print("\nTraining classifier...")
classifier.add_examples(test_texts, test_labels)

# Test Predictions
test_queries = [
    """Dear Sir / Madam:

According to our records, your account has invoices that are coming due for payment  (please see below for more detail).  If you have already arranged payment, we apologize for the duplicate communication and ask that you kindly disregard this email. If your account is in fact due for payment, please check your records and let us know if the information below reflects a fair statement of your account.  Once you have reviewed your account, please contact us at your earliest convenience to discuss.


As always, we greatly appreciate your business and look forward to continuing our relationship in the future.


Account Balance:  $4,381.00
Past Due Balance: $1,900.00



Invoice #	 P.O. #	 Invoice Date	 Due Date	 Balance	 Original Amt	 Currency
3060785             	 9450235832          	  9/22/21  	  10/22/21 	          475.00	          475.00	  USD
3060803             	 9450235835          	  9/22/21  	  10/22/21 	          475.00	          475.00	  USD
3060804             	 9450235833          	  9/22/21  	  10/22/21 	          475.00	          475.00	  USD
3060806             	 9450235834          	  9/22/21  	  10/22/21 	          475.00	          475.00	  USD
3066791             	 5130092721JO        	  9/29/21  	  10/29/21 	          960.00	          960.00	  USD
3067275             	 5130092721JO        	  9/29/21  	  10/29/21 	          507.00	          507.00	  USD
3086282             	 5130101421JO        	  10/15/21 	  11/14/21 	          507.00	          507.00	  USD
3094127             	 5130101921JO        	  10/23/21 	  11/22/21 	          507.00	          507.00	  USD
Subtotal For  USD 	 4,381.00



Please note that, for any damaged product, shortages and/or returns, the customer must submit a claim form within 5 days of receipt, or the claim will not be valid. Orders under $250 may incur a $50 handling fee. The charges will appear under freight along with any contract freight charges."""]


print("\nMaking predictions:")
for query in test_queries:
    predictions = classifier.predict(query)
    print(f"\nQuery: {query}")
    if predictions:
        print("Labels:", ", ".join(f"{label} ({prob:.3f})" for label, prob in predictions))
    else:
        print("No labels met the threshold criteria")


Initializing classifier...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]


Training classifier...
Adding 19 examples with labels: {'HTMB/HTMI', 'PO Inquiry', 'CBRE', 'Summary Billing', 'Reclass Request', 'USPI', 'Carrollton', 'Tax Exemption', 'Voicemail', 'Report Inquiry', 'Past Due Inquiry', 'Vendor Master Inquiry', 'POD/PS', 'Statement', 'Advertisement and Auto-reply', 'Non PO Invoice', 'T & E', 'ACH Inquiry', 'CC Email'}

Making predictions:

Query: Dear Sir / Madam:

According to our records, your account has invoices that are coming due for payment  (please see below for more detail).  If you have already arranged payment, we apologize for the duplicate communication and ask that you kindly disregard this email. If your account is in fact due for payment, please check your records and let us know if the information below reflects a fair statement of your account.  Once you have reviewed your account, please contact us at your earliest convenience to discuss.


As always, we greatly appreciate your business and look forward to continuing our relationship

In [3]:
# Test Predictions
test_queries = [
    """ This email originated outside of CLIENT-NAME; avoid action unless you know the content is safe. Report suspicious emails using the PhishAlarm button located in your Outlook ribbon.



Hi AP,



We have still not received the payment for these invoices.



We kindly request you to send us the payment details for these invoices.



Transaction

Original Amount

Remaining Amount

Days Late

Due Date

Transaction Date

Class

Purchase Order

Sales Order

Terms

490598

$9,102.00

$4,066.43

316

30-Nov-20

31-Oct-20

Invoice

51500522693

2980986

Net 30

497602

$9,102.00

$9,102.00

271

14-Jan-21

15-Dec-20

Invoice

DNM LOT CORR INV 495793

3015294

Net 30





Regards,

Juliana Peters

Accounts Receivable – Operations



(1800) 236-3715  Toll Free

(760) 597-5430   Fax



5919 Sea Otter Place

Suite 200


USA



POWERING MOTION®



Confidentiality Notice: This message, together with any attachments, is intended only for the use of the individual or entity to which it is addressed and may contain confidential or privileged information. If you think you have received this message in error, please advise the sender and then delete this message and any attachments immediately."""]


print("\nMaking predictions:")
for query in test_queries:
    predictions = classifier.predict(query)
    print(f"\nQuery: {query}")
    if predictions:
        print("Labels:", ", ".join(f"{label} ({prob:.3f})" for label, prob in predictions))
    else:
        print("No labels met the threshold criteria")


Making predictions:

Query:  This email originated outside of CLIENT-NAME; avoid action unless you know the content is safe. Report suspicious emails using the PhishAlarm button located in your Outlook ribbon.	



Hi AP,

 

We have still not received the payment for these invoices.

 

We kindly request you to send us the payment details for these invoices.

 

Transaction

Original Amount

Remaining Amount

Days Late

Due Date

Transaction Date

Class

Purchase Order

Sales Order

Terms

490598

$9,102.00 

$4,066.43 

316

30-Nov-20

31-Oct-20

Invoice

51500522693

2980986

Net 30

497602

$9,102.00 

$9,102.00 

271

14-Jan-21

15-Dec-20

Invoice

DNM LOT CORR INV 495793

3015294

Net 30

 

 

Regards,

Juliana Peters

Accounts Receivable – Operations

 

(1800) 236-3715  Toll Free

(760) 597-5430   Fax

 

5919 Sea Otter Place

Suite 200


USA

 

POWERING MOTION®

 

Confidentiality Notice: This message, together with any attachments, is intended only for the use of the indivi

In [5]:
# Test Predictions
test_queries = [
    """hii

Riya,



Just  sent  all the below  invoices  that need  to be paid.





                DATE              INVOICES #            AMOUNT                TYPE

          10/07/2019             16010IR              $93.87      CHARGE INVOICE

          01/22/2020             78549IR              $27.51      CHARGE INVOICE



          09/21/2021               46769           $1,052.82      CHARGE INVOICE

          09/24/2021               68109             $323.95      CHARGE INVOICE







Thank You"""]



print("\nMaking predictions:")
for query in test_queries:
    predictions = classifier.predict(query)
    print(f"\nQuery: {query}")
    if predictions:
        print("Labels:", ", ".join(f"{label} ({prob:.3f})" for label, prob in predictions))
    else:
        print("No labels met the threshold criteria")


Making predictions:

Query: hii

Riya,



Just  sent  all the below  invoices  that need  to be paid.





                DATE              INVOICES #            AMOUNT                TYPE

          10/07/2019             16010IR              $93.87      CHARGE INVOICE

          01/22/2020             78549IR              $27.51      CHARGE INVOICE

          

          09/21/2021               46769           $1,052.82      CHARGE INVOICE

          09/24/2021               68109             $323.95      CHARGE INVOICE







Thank You
Labels: Non PO Invoice (0.216), Tax Exemption (0.196), CC Email (0.186), Reclass Request (0.174)
