In [1]:
# Some standard imports
import numpy as np

from torch import nn
import torch.utils.model_zoo as model_zoo
import torch.onnx

In [2]:
from DIPnet import DIPNet

In [3]:
torch_model = DIPNet()

In [25]:
pretrained_model = "/home/anansupercuteeeee/Music/sr/super-resolution-lw/trained_model/student_dipnet_distilled.pth"
batch_size = 4    # just a random number

# Initialize model with the pretrained weights
map_location = lambda storage, loc: storage
if torch.cuda.is_available():
    map_location = None
torch_model.load_state_dict(torch.load(pretrained_model, map_location=map_location))

# set the model to inference mode
torch_model.eval()

DIPNet(
  (conv_1): Conv2d(3, 44, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (block_1): RRFB(
    (c1_r): Conv2d(44, 38, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (c2_r): Conv2d(38, 38, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (c3_r): Conv2d(38, 44, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (c5): Conv2d(44, 44, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (esa): ESA(
      (conv1): Conv2d(44, 16, kernel_size=(1, 1), stride=(1, 1))
      (conv_f): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1))
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2))
      (conv3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv4): Conv2d(16, 44, kernel_size=(1, 1), stride=(1, 1))
      (sigmoid): Sigmoid()
      (relu): ReLU(inplace=True)
    )
    (act): LeakyReLU(negative_slope=0.05, inplace=True)
  )
  (block_2): RRFB(
    (c1_r): Conv2d(44, 38, kernel_size=

In [26]:
x = torch.randn(batch_size, 3, 224, 224, requires_grad=True)
torch_out = torch_model(x)

In [27]:
torch.onnx.export(torch_model,
                  x,  # dummy input
                  "super_resolution.onnx",
                  export_params=True,
                  opset_version=11,
                  do_constant_folding=True,
                  input_names=['input'],
                  output_names=['output'],
                  dynamic_axes={
                      'input': {0: 'batch_size', 2: 'height', 3: 'width'},
                      'output': {0: 'batch_size', 2: 'height_up', 3: 'width_up'}
                  })


In [28]:
import onnx

onnx_model = onnx.load("super_resolution.onnx")
onnx.checker.check_model(onnx_model)

In [29]:
import onnxruntime

ort_session = onnxruntime.InferenceSession("super_resolution.onnx", providers=["CPUExecutionProvider"])

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
ort_outs = ort_session.run(None, ort_inputs)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

Exported model has been tested with ONNXRuntime, and the result looks good!


In [30]:
def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# Export ONNX
input_tensor = torch.randn(1, 3, 64, 64)  # hoặc input bạn đang dùng
torch_out = torch_model(input_tensor)

# Chạy ONNX
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input_tensor)}
ort_outs = ort_session.run(None, ort_inputs)


In [43]:
import time

x = torch.randn(1, 3, 512, 720, requires_grad=True)

start = time.time()
torch_out = torch_model(x)
end = time.time()
print(f"Inference of Pytorch model used {end - start} seconds")

ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
start = time.time()
ort_outs = ort_session.run(None, ort_inputs)
end = time.time()
print(f"Inference of ONNX model used {end - start} seconds")

Inference of Pytorch model used 2.2329940795898438 seconds
Inference of ONNX model used 1.4303443431854248 seconds


In [33]:
print("Execution Providers:", ort_session.get_providers())
print("Used Execution Provider:", ort_session.get_providers()[0])


Execution Providers: ['CPUExecutionProvider']
Used Execution Provider: CPUExecutionProvider


In [12]:
print(torch.cuda.is_available())  # Phải là True
print(x.device)                   # Phải là "cuda:0" nếu muốn so sánh đúng tốc độ


True
cpu


In [13]:
import torch

# Giả sử model đã được load và set .eval()
model = torch_model.eval().to("cuda")

# Dummy input giống khi inference
dummy_input = torch.randn(1, 3, 512, 512).to("cuda")

# Export
torch.onnx.export(
    model, 
    dummy_input,
    "super_resolution.onnx",
    export_params=True,
    opset_version=17,                # dùng opset mới
    do_constant_folding=True,       # tối ưu biểu thức hằng
    input_names=['input'], 
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}  # cho phép batch size thay đổi
)
print("✅ Export ONNX thành công!")


✅ Export ONNX thành công!


In [14]:
import onnx
import onnxruntime as ort

onnx_model = onnx.load("super_resolution.onnx")
onnx.checker.check_model(onnx_model)  # kiểm tra cấu trúc

ort_session = ort.InferenceSession("super_resolution.onnx", providers=["CUDAExecutionProvider"])


[0;93m2025-05-03 23:14:36.668860216 [W:onnxruntime:, session_state.cc:1263 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.[m
[0;93m2025-05-03 23:14:36.668878562 [W:onnxruntime:, session_state.cc:1265 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.[m


In [4]:
import tensorrt

In [5]:
print(tensorrt.__version__)

10.10.0.31


In [1]:
import onnxruntime
ort_session = onnxruntime.InferenceSession("super_resolution.onnx")
print(ort_session.get_inputs()[0].name)


input


In [4]:
%trtexec --version



UsageError: Line magic function `%trtexec` not found.
