<a href="https://colab.research.google.com/github/devansh20/intern/blob/main/OnnxOptimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [104]:
!pip install optimum[onnxruntime]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [52]:
from pathlib import Path
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForTokenClassification

model_id = "dslim/bert-base-NER"
onnx_path = Path("onnx")
task = "ner"

# load vanilla transformers and convert to onnx
model = ORTModelForTokenClassification.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)

# test the model with using transformers pipeline, with handle_impossible_answer for squad_v2 
optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
prediction = optimum_qa("My name is Phillip and I live in Nuremberg")

print(prediction)

Downloading:   0%|          | 0.00/829 [00:00<?, ?B/s]

[{'entity': 'B-PER', 'score': 0.99736637, 'index': 4, 'word': 'Phillip', 'start': 11, 'end': 18}, {'entity': 'B-LOC', 'score': 0.9989875, 'index': 9, 'word': 'Nuremberg', 'start': 33, 'end': 42}]


In [53]:
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

In [54]:

qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)
quantizer = ORTQuantizer.from_pretrained(model_id, feature="token-classification")


In [55]:

quantizer.export(
    onnx_model_path=onnx_path / "model.onnx",
    onnx_quantized_model_output_path=onnx_path / "model-quantized.onnx",
    quantization_config=qconfig,
)

PosixPath('onnx/model-quantized.onnx')

In [56]:
import os
size = os.path.getsize(onnx_path / "model.onnx")/(1024*1024)
print(f"Vanilla Onnx Model file size: {size:.2f} MB")
size = os.path.getsize(onnx_path / "model-quantized.onnx")/(1024*1024)
print(f"Quantized Onnx Model file size: {size:.2f} MB")

Vanilla Onnx Model file size: 411.05 MB
Quantized Onnx Model file size: 168.52 MB


In [59]:
quantized_model = ORTModelForTokenClassification.from_pretrained(onnx_path, file_name="model-quantized.onnx")

In [58]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 44.1 s, sys: 88.7 ms, total: 44.2 s
Wall time: 44 s


In [60]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=quantized_model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 28.3 s, sys: 82.3 ms, total: 28.4 s
Wall time: 28.4 s


In [63]:
from pathlib import Path
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForQuestionAnswering

model_id = "deepset/roberta-base-squad2"
onnx_path = Path("onnx")
task = "question-answering"

# load vanilla transformers and convert to onnx
model = ORTModelForQuestionAnswering.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)

# test the model with using transformers pipeline, with handle_impossible_answer for squad_v2 
optimum_qa = pipeline(task, model=model, tokenizer=tokenizer, handle_impossible_answer=True)
prediction = optimum_qa(question="What's my name?", context="My name is Philipp and I live in Nuremberg.")

print(prediction)

Downloading:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/473M [00:00<?, ?B/s]

{'score': 0.9041661620140076, 'start': 11, 'end': 18, 'answer': 'Philipp'}


In [None]:
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

# create ORTQuantizer and define quantization configuration
quantizer = ORTQuantizer.from_pretrained(model_id, feature=task)
qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)

# apply the quantization configuration to the model


PosixPath('onnx/model-quantized.onnx')

In [None]:
quantizer.export(
    onnx_model_path=onnx_path / "model.onnx",
    onnx_quantized_model_output_path=onnx_path / "model-quantized.onnx",
    quantization_config=qconfig,
)

PosixPath('onnx/model-quantized.onnx')

In [None]:
import os
size = os.path.getsize(onnx_path / "model.onnx")/(1024*1024)
print(f"Vanilla Onnx Model file size: {size:.2f} MB")
size = os.path.getsize(onnx_path / "model-quantized.onnx")/(1024*1024)
print(f"Quantized Onnx Model file size: {size:.2f} MB")

Vanilla Onnx Model file size: 473.34 MB
Quantized Onnx Model file size: 230.83 MB


In [None]:
quantized_model = ORTModelForQuestionAnswering.from_pretrained(onnx_path, file_name="model-quantized.onnx")

# test the quantized model with using transformers pipeline
quantized_optimum_qa = pipeline(task, model=quantized_model, tokenizer=tokenizer, handle_impossible_answer=True)
prediction = quantized_optimum_qa(question="What's my name?", context="My name is Philipp and I live in Nuremberg.")
print(prediction)

{'score': 0.9206579923629761, 'start': 11, 'end': 18, 'answer': 'Philipp'}


In [96]:
from pathlib import Path
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForTokenClassification

model_id = "malduwais/distilbert-base-uncased-finetuned-ner"
onnx_path = Path("onnx")
task = "ner"

# load vanilla transformers and convert to onnx
model = ORTModelForTokenClassification.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)

# test the model with using transformers pipeline, with handle_impossible_answer for squad_v2 
optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
prediction = optimum_qa("My name is Phillip and I live in Nuremberg")

print(prediction)

Downloading:   0%|          | 0.00/945 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/945 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/253M [00:00<?, ?B/s]

  scores = scores.masked_fill(mask, torch.tensor(-float("inf")))  # (bs, n_heads, q_length, k_length)


[{'entity': 'LABEL_0', 'score': 0.998279, 'index': 1, 'word': 'my', 'start': 0, 'end': 2}, {'entity': 'LABEL_0', 'score': 0.9970311, 'index': 2, 'word': 'name', 'start': 3, 'end': 7}, {'entity': 'LABEL_0', 'score': 0.9976362, 'index': 3, 'word': 'is', 'start': 8, 'end': 10}, {'entity': 'LABEL_1', 'score': 0.9954513, 'index': 4, 'word': 'phillip', 'start': 11, 'end': 18}, {'entity': 'LABEL_0', 'score': 0.9984484, 'index': 5, 'word': 'and', 'start': 19, 'end': 22}, {'entity': 'LABEL_0', 'score': 0.9987005, 'index': 6, 'word': 'i', 'start': 23, 'end': 24}, {'entity': 'LABEL_0', 'score': 0.9978296, 'index': 7, 'word': 'live', 'start': 25, 'end': 29}, {'entity': 'LABEL_0', 'score': 0.9973935, 'index': 8, 'word': 'in', 'start': 30, 'end': 32}, {'entity': 'LABEL_5', 'score': 0.9976667, 'index': 9, 'word': 'nuremberg', 'start': 33, 'end': 42}]


In [97]:
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

In [98]:
qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)
quantizer = ORTQuantizer.from_pretrained(model_id, feature="token-classification")

In [99]:
quantizer.export(
    onnx_model_path=onnx_path / "model.onnx",
    onnx_quantized_model_output_path=onnx_path / "model-quantized.onnx",
    quantization_config=qconfig,
)

PosixPath('onnx/model-quantized.onnx')

In [100]:
import os
size = os.path.getsize(onnx_path / "model.onnx")/(1024*1024)
print(f"Vanilla Onnx Model file size: {size:.2f} MB")
size = os.path.getsize(onnx_path / "model-quantized.onnx")/(1024*1024)
print(f"Quantized Onnx Model file size: {size:.2f} MB")

Vanilla Onnx Model file size: 253.23 MB
Quantized Onnx Model file size: 131.95 MB


In [101]:
quantized_model = ORTModelForTokenClassification.from_pretrained(onnx_path, file_name="model-quantized.onnx")

In [102]:
optimum_qa = pipeline("token-classification", model=quantized_model, tokenizer=tokenizer)
prediction = optimum_qa("My name is Phillip and I live in London")

prediction

[{'end': 2,
  'entity': 'LABEL_0',
  'index': 1,
  'score': 0.30557057,
  'start': 0,
  'word': 'my'},
 {'end': 7,
  'entity': 'LABEL_0',
  'index': 2,
  'score': 0.30714706,
  'start': 3,
  'word': 'name'},
 {'end': 10,
  'entity': 'LABEL_0',
  'index': 3,
  'score': 0.30784118,
  'start': 8,
  'word': 'is'},
 {'end': 18,
  'entity': 'LABEL_0',
  'index': 4,
  'score': 0.30188903,
  'start': 11,
  'word': 'phillip'},
 {'end': 22,
  'entity': 'LABEL_0',
  'index': 5,
  'score': 0.30615252,
  'start': 19,
  'word': 'and'},
 {'end': 24,
  'entity': 'LABEL_0',
  'index': 6,
  'score': 0.30732495,
  'start': 23,
  'word': 'i'},
 {'end': 29,
  'entity': 'LABEL_0',
  'index': 7,
  'score': 0.30577922,
  'start': 25,
  'word': 'live'},
 {'end': 32,
  'entity': 'LABEL_0',
  'index': 8,
  'score': 0.30360752,
  'start': 30,
  'word': 'in'},
 {'end': 39,
  'entity': 'LABEL_0',
  'index': 9,
  'score': 0.30468535,
  'start': 33,
  'word': 'london'}]

In [105]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 24.9 s, sys: 114 ms, total: 25 s
Wall time: 24.9 s


In [106]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=quantized_model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 15.1 s, sys: 125 ms, total: 15.2 s
Wall time: 15.2 s


In [95]:
from pathlib import Path
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForSequenceClassification

model_id = "distilbert-base-uncased-finetuned-sst-2-english"
onnx_path = Path("onnx")
task = "ner"

# load vanilla transformers and convert to onnx
model = ORTModelForSequenceClassification.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)

# test the model with using transformers pipeline, with handle_impossible_answer for squad_v2 
from transformers import pipeline

classifier = pipeline("text-classification",model=model,tokenizer=tokenizer)
prediction = classifier("I loved Star Wars so much!")
print(prediction)

Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

  scores = scores.masked_fill(mask, torch.tensor(-float("inf")))  # (bs, n_heads, q_length, k_length)


[{'label': 'POSITIVE', 'score': 0.999840259552002}]


In [33]:
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

In [35]:
qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)
quantizer = ORTQuantizer.from_pretrained(model_id, feature="sequence-classification")

In [36]:
quantizer.export(
    onnx_model_path=onnx_path / "model.onnx",
    onnx_quantized_model_output_path=onnx_path / "model-quantized.onnx",
    quantization_config=qconfig,
)

PosixPath('onnx/model-quantized.onnx')

In [37]:
import os
size = os.path.getsize(onnx_path / "model.onnx")/(1024*1024)
print(f"Vanilla Onnx Model file size: {size:.2f} MB")
size = os.path.getsize(onnx_path / "model-quantized.onnx")/(1024*1024)
print(f"Quantized Onnx Model file size: {size:.2f} MB")

Vanilla Onnx Model file size: 255.46 MB
Quantized Onnx Model file size: 134.21 MB


In [39]:
quantized_model = ORTModelForSequenceClassification.from_pretrained(onnx_path, file_name="model-quantized.onnx")

In [49]:
%%time
for i in range(100):
                    classifier = pipeline("text-classification",model=quantized_model,tokenizer=tokenizer)
                    example = ["I loved Star Wars so much!","I hate you","He loves football","She loves that show","He is smitten by her","He loves his dog","I love F1 so much","I love to read","I hate cooking","I hate to drive"]
                    prediction = classifier(example)

CPU times: user 10.5 s, sys: 20.6 ms, total: 10.5 s
Wall time: 10.5 s


In [50]:
%%time
for i in range(100):
                    classifier = pipeline("text-classification",model=model,tokenizer=tokenizer)
                    example = ["I loved Star Wars so much!","I hate you","He loves football","She loves that show","He is smitten by her","He loves his dog","I love F1 so much","I love to read","I hate cooking","I hate to drive"]
                    prediction = classifier(example)


CPU times: user 18.5 s, sys: 32.9 ms, total: 18.5 s
Wall time: 19.2 s


In [78]:
from pathlib import Path
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForTokenClassification

model_id = "dslim/bert-base-NER"
onnx_path = Path("onnx")
task = "ner"

# load vanilla transformers and convert to onnx
model = ORTModelForTokenClassification.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)

# test the model with using transformers pipeline, with handle_impossible_answer for squad_v2 
optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
prediction = optimum_qa("My name is Phillip and I live in Nuremberg")

print(prediction)

Downloading:   0%|          | 0.00/829 [00:00<?, ?B/s]

[{'entity': 'B-PER', 'score': 0.99736637, 'index': 4, 'word': 'Phillip', 'start': 11, 'end': 18}, {'entity': 'B-LOC', 'score': 0.9989875, 'index': 9, 'word': 'Nuremberg', 'start': 33, 'end': 42}]


In [79]:
from optimum.onnxruntime import ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
optimizer = ORTOptimizer.from_pretrained(model_id, feature="token-classification")
optimization_config = OptimizationConfig(optimization_level=99)
optimizer.export(
    onnx_model_path=onnx_path / "model.onnx",
    onnx_optimized_model_output_path=onnx_path / "model-optimized.onnx",
    optimization_config=optimization_config,
)

PosixPath('onnx/model-optimized.onnx')

In [80]:
import os
# get model file size
size = os.path.getsize(onnx_path / "model.onnx")/(1024*1024)
print(f"Vanilla Onnx Model file size: {size:.2f} MB")
size = os.path.getsize(onnx_path / "model-optimized.onnx")/(1024*1024)
print(f"optimized Onnx Model file size: {size:.2f} MB")

Vanilla Onnx Model file size: 411.05 MB
optimized Onnx Model file size: 410.97 MB


In [81]:
from optimum.onnxruntime import ORTModelForTokenClassification

# load quantized model
opt_model = ORTModelForTokenClassification.from_pretrained(onnx_path, file_name="model-optimized.onnx")

# test the quantized model with using transformers pipeline
opt_optimum_qa = pipeline(task, model=opt_model, tokenizer=tokenizer)
prediction = opt_optimum_qa("My name is Philipp and I live in Nuremberg.")
print(prediction)

[{'entity': 'B-PER', 'score': 0.9983736, 'index': 4, 'word': 'Philipp', 'start': 11, 'end': 18}, {'entity': 'B-LOC', 'score': 0.99893504, 'index': 9, 'word': 'Nuremberg', 'start': 33, 'end': 42}]


In [82]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 45.3 s, sys: 216 ms, total: 45.6 s
Wall time: 45.4 s


In [83]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=opt_model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 45.6 s, sys: 201 ms, total: 45.8 s
Wall time: 45.9 s


In [84]:
from pathlib import Path
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForTokenClassification

model_id = "dslim/bert-base-NER"
onnx_path = Path("onnx")
task = "ner"

# load vanilla transformers and convert to onnx
model = ORTModelForTokenClassification.from_pretrained(model_id, from_transformers=True)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# save onnx checkpoint and tokenizer
model.save_pretrained(onnx_path)
tokenizer.save_pretrained(onnx_path)

# test the model with using transformers pipeline, with handle_impossible_answer for squad_v2 
optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
prediction = optimum_qa("My name is Phillip and I live in Nuremberg")

print(prediction)

Downloading:   0%|          | 0.00/829 [00:00<?, ?B/s]

[{'entity': 'B-PER', 'score': 0.99736637, 'index': 4, 'word': 'Phillip', 'start': 11, 'end': 18}, {'entity': 'B-LOC', 'score': 0.9989875, 'index': 9, 'word': 'Nuremberg', 'start': 33, 'end': 42}]


In [85]:
from optimum.onnxruntime import ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
optimizer = ORTOptimizer.from_pretrained(model_id, feature="token-classification")
optimization_config = OptimizationConfig(optimization_level=99)
optimizer.export(
    onnx_model_path=onnx_path / "model.onnx",
    onnx_optimized_model_output_path=onnx_path / "model-optimized.onnx",
    optimization_config=optimization_config,
)

PosixPath('onnx/model-optimized.onnx')

In [87]:
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

# create ORTQuantizer and define quantization configuration
quantizer = ORTQuantizer.from_pretrained(model_id, feature="token-classification")
qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)

# apply the quantization configuration to the model
quantizer.export(
    onnx_model_path=onnx_path / "model-optimized.onnx",
    onnx_quantized_model_output_path=onnx_path / "model-quantized.onnx",
    quantization_config=qconfig,
)

PosixPath('onnx/model-quantized.onnx')

In [88]:
import os
# get model file size
size = os.path.getsize(onnx_path / "model.onnx")/(1024*1024)
print(f"Vanilla Onnx Model file size: {size:.2f} MB")
size = os.path.getsize(onnx_path / "model-optimized.onnx")/(1024*1024)
print(f"Optimized Onnx Model file size: {size:.2f} MB")
size = os.path.getsize(onnx_path / "model-quantized.onnx")/(1024*1024)
print(f"Quantized Onnx Model file size: {size:.2f} MB")


Vanilla Onnx Model file size: 411.05 MB
Optimized Onnx Model file size: 410.97 MB
Quantized Onnx Model file size: 228.99 MB


In [90]:
quantized_model = ORTModelForTokenClassification.from_pretrained(onnx_path, file_name="model-quantized.onnx")
quantized_optimum_qa = pipeline(task, model=quantized_model, tokenizer=tokenizer)
prediction = quantized_optimum_qa("My name is Philipp and I live in Nuremberg.")
print(prediction)

[{'entity': 'I-MISC', 'score': 0.19693893, 'index': 1, 'word': 'My', 'start': 0, 'end': 2}, {'entity': 'I-MISC', 'score': 0.1948351, 'index': 2, 'word': 'name', 'start': 3, 'end': 7}, {'entity': 'I-MISC', 'score': 0.19829448, 'index': 3, 'word': 'is', 'start': 8, 'end': 10}, {'entity': 'I-MISC', 'score': 0.18494345, 'index': 4, 'word': 'Philipp', 'start': 11, 'end': 18}, {'entity': 'I-MISC', 'score': 0.20028749, 'index': 5, 'word': 'and', 'start': 19, 'end': 22}, {'entity': 'I-MISC', 'score': 0.19549514, 'index': 6, 'word': 'I', 'start': 23, 'end': 24}, {'entity': 'I-MISC', 'score': 0.18896174, 'index': 7, 'word': 'live', 'start': 25, 'end': 29}, {'entity': 'I-MISC', 'score': 0.19690113, 'index': 8, 'word': 'in', 'start': 30, 'end': 32}, {'entity': 'I-MISC', 'score': 0.19497155, 'index': 9, 'word': 'Nuremberg', 'start': 33, 'end': 42}, {'entity': 'I-MISC', 'score': 0.2054881, 'index': 10, 'word': '.', 'start': 42, 'end': 43}]


In [91]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=quantized_model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 35.2 s, sys: 232 ms, total: 35.4 s
Wall time: 35.3 s


In [92]:
%%time
for i in range(100):
                    optimum_qa = pipeline(task, model=model, tokenizer=tokenizer)
                    example = ["My name is Wolfgang and I live in Berlin",
    "My name is Sarah and I live in London", 
    "My name is Clara and I live in Berkeley, California",
    "George Washington went to Washington",
    "Boris Johnson is the Prime Minister of England",
    "Roger Federer is a Tennis player",
    "Cristiano Ronaldo is a striker for Manchester United",
    "United Nations resolves conflicts",
    "World Trade Organization is a global body",
    "Antonio Guerrez works for the United Nations"]
                    prediction = optimum_qa(example)

CPU times: user 49.1 s, sys: 184 ms, total: 49.3 s
Wall time: 49.1 s
