### Загружаем модель для детекции уклончивости с huggingface

[link for english model](https://huggingface.co/alenaa/evasiveness)

In [14]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("alenaa/evasiveness")
model = AutoModelForSequenceClassification.from_pretrained("alenaa/evasiveness")

In [15]:
tokenizer.save_pretrained("local-pt-checkpoint", safe_serialization=False)  
model.save_pretrained("local-pt-checkpoint", safe_serialization=False)

### Конвертируем в формат onnx

In [None]:
pip install 'transformers[onnx]'

In [None]:
!conda install onnxruntime -c conda-forge -y

In [23]:
import subprocess  

subprocess.run(f"python -m transformers.onnx --model=local-pt-checkpoint --feature=sequence-classification onnx/".split())

Local PyTorch model found.
Framework not requested. Using torch to export to ONNX.


CompletedProcess(args=['python', '-m', 'transformers.onnx', '--model=local-pt-checkpoint', '--feature=sequence-classification', 'onnx/'], returncode=-11)

In [89]:
import onnxruntime 

onnx_session = onnxruntime.InferenceSession(
    'onnx/model.onnx',
    providers=['CPUExecutionProvider'])

### Тестируем модели

Предсказываем с помощью ONNX модели

In [86]:
import numpy as np
import torch 
import time 

def predict_onnx(feed):
    start_time = time.time()
    output = onnx_session.run(None, feed)
    end_time = time.time() - start_time

    output_tensor = torch.tensor(output[0], dtype=torch.float32)
    softmax_output = torch.nn.functional.softmax(output_tensor, dim=-1)
    predictions = np.squeeze(softmax_output.numpy())
    return(np.argmax(predictions, axis=1)), end_time


Предсказываем с помощью обычной модели

In [84]:
def predict(inputs):
    model.eval()
    with torch.no_grad():
        start_time = time.time()
        outputs = model(**inputs)
        end_time = time.time() - start_time
        logits = outputs.logits
        
        predictions = torch.argmax(logits, dim=-1)
        
        return predictions, end_time

Проверим точность и время на сгенерированном датасете

In [63]:
import pandas as pd

data = pd.read_csv('english_data_200.csv', index_col = 0)
Q = data.Question.values.tolist()
A = data.Answer.values.tolist()

In [95]:
inputs = tokenizer(Q, A, truncation=True, padding=True, max_length=512, return_tensors="pt")
feed = dict(
        input_ids=np.array(inputs["input_ids"]).astype("int64"),
        attention_mask=np.array(inputs["attention_mask"]).astype("int64"))

In [114]:
from sklearn.metrics import accuracy_score

test = data.Label.values.tolist()

def print_results(prediction_func, inp):
    test = data.Label.values.tolist()
    preds, time_ = prediction_func(inp)
    mse = accuracy_score(test, preds.tolist())
    if prediction_func == predict:
        model_type = 'Simple Model'
    else:
        model_type = 'ONNX Model'
    print(f'{model_type}')
    print('MSE: ', mse)
    print('Time: ', time_)

In [115]:
print_results(predict, inputs)

Simple Model
MSE:  0.7548076923076923
Time:  2.028697967529297


In [116]:
print_results(predict_onnx, feed)

ONNX Model
MSE:  0.7548076923076923
Time:  2.334728956222534


Попробуем оптимизировать модель onnx

In [None]:
!pip install onnxruntime_tools

In [120]:
from onnxruntime_tools import optimizer

optimized_model = optimizer.optimize_model("onnx/model.onnx", model_type='bert')
optimized_model.save_model_to_file("onnx/opt_model.onnx")
onnx_session = onnxruntime.InferenceSession(
    'onnx/opt_model.onnx',
    providers=['CPUExecutionProvider'])
print_results(predict_onnx, feed)

ONNX Model
MSE:  0.7548076923076923
Time:  1.8897769451141357
