- 智能客服
    - 用户问题（一次 query）意图识别

## pipeline

In [55]:
import torch
from pathlib import Path
from time import perf_counter
import numpy as np
from transformers import pipeline
from tqdm import tqdm

In [35]:
import os
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'

In [36]:
bert_ckpt = 'transformersbook/bert-base-uncased-finetuned-clinc'
pipe = pipeline('text-classification', model=bert_ckpt)

In [43]:
query = """Hey, I'd like to rent a vehicle from Nov 1st to Nov 15th in Paris and I need a 15 passenger van"""
pipe(query)

[{'label': 'car_rental', 'score': 0.5490034818649292}]

### 关于 pipeline

- `pipe.model`

In [8]:
# classifier head: 151 分类
pipe.model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

## 模型性能评估

- Model performance
    - dataset accuracy
- Latency
    - query/inference time
- Memory
    - model size

### datasets

In [21]:
from datasets import load_dataset

In [22]:
clinc = load_dataset("clinc_oos", "plus")

Using the latest cached version of the module from /home/whaow/.cache/huggingface/modules/datasets_modules/datasets/clinc_oos/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1 (last modified on Wed Jun 14 00:03:25 2023) since it couldn't be found locally at clinc_oos., or remotely on the Hugging Face Hub.
Found cached dataset clinc_oos (/home/whaow/.cache/huggingface/datasets/clinc_oos/plus/1.0.0/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1)


  0%|          | 0/3 [00:00<?, ?it/s]

In [23]:
clinc['test'][42]

{'text': 'transfer $100 from my checking to saving account', 'intent': 133}

In [24]:
# clinc['test'].features['intent']

### metrics

In [31]:
intents = clinc['test'].features['intent']

In [50]:
from datasets import load_metric
accuracy_score = load_metric('accuracy')

In [56]:
class PerformanceBenchmark:
    def __init__(self, pipe, dataset, optim_type='BERT baseline'):
        self.pipe = pipe
        self.dataset = dataset
        self.optim_type = optim_type
        
#     def compute_accuracy(self):
#         pass
    
    def compute_accuracy(self):
        preds, labels = [], []
        # 可以改造为批次化的 input
        for example in tqdm(self.dataset, desc='evaluate on test dataset'):
            pred = self.pipe(example['text'])[0]['label']
            label = example['intent']
            preds.append(intents.str2int(pred))
            labels.append(label)
        accuracy = accuracy_score.compute(predictions=preds, references=labels)
        print(f'Accuracy on test set: {accuracy["accuracy"]:.3f}')
        return accuracy
    
    def compute_size(self):
        state_dict = self.pipe.model.state_dict()
        tmp_path = Path('model.pth')
        torch.save(state_dict, tmp_path)
        size_mb = Path(tmp_path).stat().st_size / (1024*1024)
        tmp_path.unlink()
        print(f'Model size (MB): {size_mb:.2f}')
        return {'size_mb': size_mb}
    
    def time_pipeline(self, query='what is the pin number of my account'):
        latencies = []
        
        # warmup
        for _ in range(10):
            _ = self.pipe(query)
            
        # timed run
        for _ in range(100):
            start_time = perf_counter()
            _ = self.pipe(query)
            latency = perf_counter() - start_time
            latencies.append(latency)
        
        # run stats
        time_avg_time = 1000 * np.mean(latencies)
        time_std_time = 1000 * np.std(latencies)
        print(f'Average latency (ms): {time_avg_time:.2f} +\- {time_std_time:.2f}')
        return {'time_avg_ms': time_avg_time, 'time_std_ms': time_std_time}
    
    def run_benchmark(self):
        metrics = {}
        metrics[self.optim_type] = self.compute_size()
        metrics[self.optim_type].update(self.time_pipeline())
        metrics[self.optim_type].update(self.compute_accuracy())
        return metrics

In [58]:
benchmark = PerformanceBenchmark(pipe, clinc['test'])
benchmark.run_benchmark()

Model size (MB): 418.15
Average latency (ms): 11.59 +\- 0.10


evaluate on test dataset: 100%|██████████| 5500/5500 [01:00<00:00, 91.18it/s]

Accuracy on test set: 0.867





{'BERT baseline': {'size_mb': 418.1508378982544,
  'time_avg_ms': 11.589008858427405,
  'time_std_ms': 0.09915921844985716,
  'accuracy': 0.8672727272727273}}