In [None]:
# !pip install torch transformers datasets evaluate onnx onnxruntime openvino-dev psutil pandas
#

In [None]:
import os, time, psutil, json
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import torchvision.models as models
import onnxruntime as ort

# CUDA‑флаг
USE_GPU = torch.cuda.is_available()
DEVICE_CPU = torch.device('cpu')
DEVICE_GPU = torch.device('cuda' if USE_GPU else 'cpu')

# для повтора эксперимента
N_WARMUP, N_RUNS = 10, 50


In [None]:
def measure_inference_time(model, device='cpu', n_runs=N_RUNS):
    model.eval()
    dummy = torch.randn(1, 3, 224, 224, device=device)
    with torch.no_grad():
        for _ in range(N_WARMUP):
            _ = model(dummy)
        if device != 'cpu':
            torch.cuda.synchronize()
        start = time.time()
        for _ in range(n_runs):
            _ = model(dummy)
        if device != 'cpu':
            torch.cuda.synchronize()
    return (time.time() - start) * 1e3 / n_runs  # ms

_process = psutil.Process(os.getpid())

def rss_mb() -> float:
    return _process.memory_info().rss / 1024 ** 2

def vram_mb() -> float:
    return torch.cuda.max_memory_allocated() / 1024 ** 2

def file_mb(path: str | Path) -> float:
    return Path(path).stat().st_size / 1024 ** 2

def reset_vram():
    if USE_GPU:
        torch.cuda.reset_max_memory_allocated()


In [None]:
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /home/kazanplova/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 370MB/s]


#### TorchScript


In [None]:
ts_path = "resnet50_ts.pt"
ts_model = torch.jit.trace(model.to(DEVICE_CPU), torch.randn(1, 3, 224, 224))
ts_model.save(ts_path)

#### ONNX

In [None]:
onnx_path = "resnet50.onnx"
torch.onnx.export(
    model.to(DEVICE_CPU), torch.randn(1, 3, 224, 224),
    onnx_path, input_names=['input'], output_names=['output'], opset_version=13
)

#### TensorRT (fp16)

In [None]:
from torch2trt import torch2trt
trt_path = "resnet50_trt.pth"
trt_model = torch2trt(
    model.to(DEVICE_GPU), [torch.randn(1, 3, 224, 224, device=DEVICE_GPU)],
    fp16_mode=True
)
torch.save(trt_model.state_dict(), trt_path)

#### OpenVINO (из ONNX)

In [None]:
from openvino.runtime import Core, serialize

os.makedirs("openvino_model", exist_ok=True)
ie = Core()
ov = ie.read_model(onnx_path)
serialize(ov, "openvino_model/model.xml", "openvino_model/model.bin")


### benchmarking

In [None]:
results = []

pt_cpu_time = measure_inference_time(model.to(DEVICE_CPU), 'cpu')
results.append(["PyTorch-CPU", pt_cpu_time, None, rss_mb(), None, "n/a"])

if USE_GPU:
    reset_vram()
    pt_gpu_time = measure_inference_time(model.to(DEVICE_GPU), 'cuda')
    results.append(["PyTorch-GPU", None, pt_gpu_time, None, vram_mb(), "n/a"])

m = torch.jit.load(ts_path).to(DEVICE_CPU)
ts_cpu_time = measure_inference_time(m, 'cpu')
results.append(["TorchScript-CPU", ts_cpu_time, None, rss_mb(), None, file_mb(ts_path)])

if USE_GPU:
    reset_vram()
    ts_gpu_time = measure_inference_time(m.to(DEVICE_GPU), 'cuda')
    results.append(["TorchScript-GPU", None, ts_gpu_time, None, vram_mb(), file_mb(ts_path)])

def onnx_run(sess):
    data = np.random.randn(1, 3, 224, 224).astype(np.float32)
    sess.run(None, {sess.get_inputs()[0].name: data})

sess_cpu = ort.InferenceSession(onnx_path, providers=["CPUExecutionProvider"])
for _ in range(N_WARMUP): onnx_run(sess_cpu)
start = time.time()
for _ in range(N_RUNS): onnx_run(sess_cpu)
onnx_cpu_time = (time.time() - start) * 1e3 / N_RUNS
results.append(["ONNX-CPU", onnx_cpu_time, None, rss_mb(), None, file_mb(onnx_path)])

if USE_GPU:
    sess_gpu = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"])
    reset_vram()
    for _ in range(N_WARMUP): onnx_run(sess_gpu)
    start = time.time()
    for _ in range(N_RUNS): onnx_run(sess_gpu)
    onnx_gpu_time = (time.time() - start) * 1e3 / N_RUNS
    results.append(["ONNX-GPU", None, onnx_gpu_time, None, vram_mb(), file_mb(onnx_path)])

reset_vram()
trt_time = measure_inference_time(trt_model, 'cuda')
results.append(["TensorRT-GPU", None, trt_time, None, vram_mb(), file_mb(trt_path)])

compiled = ie.compile_model(ov, "CPU")
def ov_run():
    compiled([np.random.randn(1, 3, 224, 224).astype(np.float32)])

for _ in range(N_WARMUP): ov_run()
start = time.time()
for _ in range(N_RUNS): ov_run()
ov_time = (time.time() - start) * 1e3 / N_RUNS
results.append([
    "OpenVINO-CPU", ov_time, None, rss_mb(), None,
    file_mb("openvino_model/model.xml") + file_mb("openvino_model/model.bin")
])


### Results

In [None]:
import json

cols = ["Format", "Inf_CPU_ms", "Inf_GPU_ms", "RAM_MB", "VRAM_MB", "File_MB"]
json_results = [dict(zip(cols, row)) for row in results]

print(json.dumps(json_results, indent=2, ensure_ascii=False))

gpu_candidates = [d for d in json_results if d["Inf_GPU_ms"]]
cpu_candidates = [d for d in json_results if d["Inf_CPU_ms"]]

best_gpu = min(gpu_candidates, key=lambda d: d["Inf_GPU_ms"])["Format"] if gpu_candidates else "—"
best_cpu = min(cpu_candidates, key=lambda d: d["Inf_CPU_ms"])["Format"] if cpu_candidates else "—"

print(f"Самый быстрый GPU‑вариант: {best_gpu}")
print(f"Самый быстрый CPU‑вариант: {best_cpu}")


[
  {
    "Format": "PyTorch-CPU",
    "Inf_CPU_ms": 23.090620040893555,
    "Inf_GPU_ms": null,
    "RAM_MB": 7664.28515625,
    "VRAM_MB": null,
    "File_MB": "n/a"
  },
  {
    "Format": "PyTorch-GPU",
    "Inf_CPU_ms": null,
    "Inf_GPU_ms": 3.9195919036865234,
    "RAM_MB": null,
    "VRAM_MB": 119.12890625,
    "File_MB": "n/a"
  },
  {
    "Format": "TorchScript-CPU",
    "Inf_CPU_ms": 23.486652374267578,
    "Inf_GPU_ms": null,
    "RAM_MB": 7683.74609375,
    "VRAM_MB": null,
    "File_MB": 98.06046485900879
  },
  {
    "Format": "TorchScript-GPU",
    "Inf_CPU_ms": null,
    "Inf_GPU_ms": 2.9832839965820312,
    "RAM_MB": null,
    "VRAM_MB": 216.85693359375,
    "File_MB": 98.06046485900879
  },
  {
    "Format": "ONNX-CPU",
    "Inf_CPU_ms": 8.958115577697754,
    "Inf_GPU_ms": null,
    "RAM_MB": 7869.72265625,
    "VRAM_MB": null,
    "File_MB": 97.41437149047852
  },
  {
    "Format": "ONNX-GPU",
    "Inf_CPU_ms": null,
    "Inf_GPU_ms": 8.751931190490723,
    "RAM_MB