In [1]:
import os
os.environ['http_proxy'] = "http://proxy-ws.cbank.kz:8080"
os.environ['https_proxy'] = "http://proxy-ws.cbank.kz:8080"

In [2]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, AutoTokenizer, AutoModel
from transformers import AutoModelForTokenClassification
from onnxruntime.quantization import quantize_dynamic, QuantType
import torch.nn.functional as F
from pathlib import Path
import onnxruntime
import numpy as np
import torch
import onnx
import time
import os
import shutil

### WRITE DOWN MODEL PATH ###

In [61]:
model_type = 'main_ner' 

In [63]:
model_path = f"{model_type}/"
if not os.path.exists(model_path):
    assert False, f"The path '{model_path}' does not exist."

In [64]:
model_path_folder = [item for item in model_path.split("/") if item][-1]
optimized_folder = "optimized_" + model_path_folder
index = model_path.rfind(model_path_folder)
first_part = model_path[:index]
second_part = model_path[index+len(model_path_folder):]
new_model_path = first_part + optimized_folder + second_part

if os.path.exists(new_model_path):
    for filename in os.listdir(new_model_path):
        file_path = os.path.join(new_model_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f'Failed to delete {file_path}. Reason: {e}')
    os.rmdir(new_model_path)
    print(f"Deleted existing folder: {new_model_path}")

os.makedirs(new_model_path, exist_ok=True)
print(f"Folder created: {new_model_path}")

Folder created: optimized_main_ner/


In [67]:
tokenizer = AutoTokenizer.from_pretrained(model_path) # e5 models
model = AutoModelForTokenClassification.from_pretrained(model_path) # e5 models

In [68]:
sentences = ['Менің атым Абылай']
encoded_input = tokenizer(sentences,max_length=512, padding=True, truncation=True, return_tensors='pt')

In [69]:
model_fp32 = new_model_path + 'temp_model.onnx'
model_quant = new_model_path + 'model.onnx'

# Export ONNX model
torch.onnx.export(
    model, 
    f=model_fp32,  
    input_names=['input_ids', 'attention_mask'],  
    output_names=['logits'],  
    dynamic_axes={
        'input_ids': {0: 'batch_size', 1: 'sequence'},
        'attention_mask': {0: 'batch_size', 1: 'sequence'},
        'logits': {0: 'batch_size', 1: 'sequence'} 
    }, 
    do_constant_folding=True, 
    opset_version=14, 
)


In [70]:
from onnxruntime.quantization import quantize_dynamic, QuantType

quantized_model = quantize_dynamic(
    model_fp32, 
    model_quant,
    weight_type=QuantType.QInt8  
)




In [106]:
def clean_and_combine_tokens(tokens, labels):
    cleaned_tokens = []
    cleaned_labels = []
    current_word = ""
    current_label = None

    for token, label in zip(tokens, labels):
        if token in ["<s>", "</s>", "<unk>"]:  
            continue
        
        if token.startswith("▁"):  
            if current_word:  
                cleaned_tokens.append(current_word)
                cleaned_labels.append(current_label)
            current_word = token[1:]  
            current_label = label
        else:  
            current_word += token

        if current_label == "O":
            current_label = label

    if current_word:
        cleaned_tokens.append(current_word)
        cleaned_labels.append(current_label)

    return cleaned_tokens, cleaned_labels

In [112]:
clean_tokens, clean_labels = clean_and_combine_tokens(tokens, predicted_labels)

for token, label in zip(clean_tokens, clean_labels):
    print(f"Token: {token}\t Label: {label}")

Token: Менің	 Label: O
Token: атым	 Label: O
Token: Абылай	 Label: B-PERSON


In [104]:
from transformers import pipeline
import json
from tqdm import tqdm
import pandas as pd
df = pd.read_csv('ner_dataset.csv')
config = json.load(open("main_ner_wlang/config.json"))
id2label = config['id2label']

In [111]:
import numpy as np
import torch
import torch.nn.functional as F

def run_ner_inference(input_text):
    encoded_input = tokenizer(input_text, padding=True, truncation=True, return_tensors="pt", add_special_tokens=True)
    input_ids = encoded_input['input_ids'].numpy()
    attention_mask = encoded_input['attention_mask'].numpy()

    ort_inputs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask
    }

    ort_outs = ort_session.run(['logits'], ort_inputs)
    logits = ort_outs[0]

    probabilities = F.softmax(torch.from_numpy(logits), dim=-1).numpy()

    return probabilities, input_ids

ort_session = onnxruntime.InferenceSession(model_quant, providers=["CPUExecutionProvider"])
input_text = "Менің атым Абылай" 
probabilities, input_ids = run_ner_inference(input_text)

predicted_classes = np.argmax(probabilities, axis=-1)

id2label = config['id2label']

predicted_labels = [id2label[str(class_id)] for class_id in predicted_classes[0]]

tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
clean_tokens, clean_labels = clean_and_combine_tokens(tokens, predicted_labels)
print(clean_tokens, clean_labels)

['Менің', 'атым', 'Абылай'] ['O', 'O', 'B-PERSON']


In [105]:
%%time
ort_session = onnxruntime.InferenceSession(model_quant, providers=["CPUExecutionProvider"])
for ind, it in tqdm(df.iterrows(), desc='going for ner', total=len(df)):

    input_text = it['content']
    logits, input_ids = run_ner_inference(input_text)
    
    predicted_classes = np.argmax(logits, axis=-1)

    predicted_labels = [id2label[str(class_id)] for class_id in predicted_classes[0]]
    
    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
    
    
    clean_tokens, clean_labels = clean_and_combine_tokens(tokens, predicted_labels)

going for ner: 100%|██████████| 8013/8013 [14:55<00:00,  8.95it/s]

CPU times: user 7h 49min 43s, sys: 5 s, total: 7h 49min 48s
Wall time: 14min 56s



