- 智能客服
    - 用户问题（一次 query）意图识别

## pipeline

In [None]:
import torch
from pathlib import Path
from time import perf_counter
import numpy as np
from transformers import pipeline
from tqdm import tqdm

In [None]:
import os
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890'

In [None]:
bert_ckpt = 'transformersbook/bert-base-uncased-finetuned-clinc'
pipe = pipeline('text-classification', model=bert_ckpt, device=0)

In [None]:
next(iter(pipe.model.parameters())).device

In [None]:
query = """Hey, I'd like to rent a vehicle from Nov 1st to Nov 15th in Paris and I need a 15 passenger van"""
pipe(query)

### 关于 pipeline

- `pipe.model`

In [None]:
# classifier head: 151 分类
pipe.model

## 模型性能评估

- Model performance
    - dataset accuracy
- Latency
    - query/inference time
- Memory
    - model size

### datasets

In [None]:
from datasets import load_dataset

In [None]:
clinc = load_dataset("clinc_oos", "plus")

In [None]:
clinc['test'][42]

In [None]:
# clinc['test'].features['intent']

### metrics

In [None]:
intents = clinc['test'].features['intent']

In [None]:
from datasets import load_metric
accuracy_score = load_metric('accuracy')

In [None]:
class PerformanceBenchmark:
    def __init__(self, pipe, dataset, optim_type='BERT baseline'):
        self.pipe = pipe
        self.dataset = dataset
        self.optim_type = optim_type
        
#     def compute_accuracy(self):
#         pass
    
    def compute_accuracy(self):
        preds, labels = [], []
        # 可以改造为批次化的 input
        for example in tqdm(self.dataset, desc='evaluate on test dataset'):
            pred = self.pipe(example['text'])[0]['label']
            label = example['intent']
            preds.append(intents.str2int(pred))
            labels.append(label)
        accuracy = accuracy_score.compute(predictions=preds, references=labels)
        print(f'Accuracy on test set: {accuracy["accuracy"]:.3f}')
        return accuracy
    
    def compute_size(self):
        state_dict = self.pipe.model.state_dict()
        tmp_path = Path('model.pth')
        torch.save(state_dict, tmp_path)
        size_mb = Path(tmp_path).stat().st_size / (1024*1024)
        tmp_path.unlink()
        print(f'Model size (MB): {size_mb:.2f}')
        return {'size_mb': size_mb}
    
    def time_pipeline(self, query='what is the pin number of my account'):
        latencies = []
        
        # warmup
        for _ in range(10):
            _ = self.pipe(query)
            
        # timed run
        for _ in range(100):
            start_time = perf_counter()
            _ = self.pipe(query)
            latency = perf_counter() - start_time
            latencies.append(latency)
        
        # run stats
        time_avg_time = 1000 * np.mean(latencies)
        time_std_time = 1000 * np.std(latencies)
        print(f'Average latency (ms): {time_avg_time:.2f} +\- {time_std_time:.2f}')
        return {'time_avg_ms': time_avg_time, 'time_std_ms': time_std_time}
    
    def run_benchmark(self):
        metrics = {}
        metrics[self.optim_type] = self.compute_size()
        metrics[self.optim_type].update(self.time_pipeline())
        metrics[self.optim_type].update(self.compute_accuracy())
        return metrics

In [None]:
benchmark = PerformanceBenchmark(pipe, clinc['test'])
benchmark.run_benchmark()