-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Description
windows10 , cuda 11,1 , from MxNet , CPU result is right,GPU results is error.
CPU:
device = 'x86.cpu'
ctx = tvm.cpu(0)
GPU:
device = 'x86.cuda'
ctx=tvm.gpu(0)
the whole .py:
import mxnet as mx
import tvm
import tvm.relay as relay
import numpy as np
from tvm.contrib import util
import os
dtype = 'float32'
use_arm64 = False
use_android = False
network = 'IrisAttackCCL'
device = 'x86.cuda' //device = 'x86.cpu'
ctx=tvm.gpu(0) //ctx = tvm.cpu(0)
model_path = './'
path = model_path + (network)
#set the input shape/layer
input_layer = 'data'
batch_size = 1
image_shape = (1, 240,320)
input_shape = (batch_size,) + image_shape
######################################################################
if device == 'cpu':
if use_arm64:
target = tvm.target.create('llvm -device=arm_cpu -target=arm64-linux-android -mattr=+neon')
else:
target = tvm.target.create('llvm -device=arm_cpu -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft')
target_host = None
elif device == 'gpu':
#target = tvm.target.create('opencl -device=mali')
#target_host = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
target = tvm.target.create('opencl -device=mali')
if use_arm64:
target_host = 'llvm -target=arm64-linux-android -mattr=+neon'
else:
target_host = 'llvm -target=arm-linux-androideabi -mattr=+neon -mfloat-abi=soft'
elif device == 'x86.cpu':
target = 'llvm'
target_host = None
elif device == 'x86.cuda':
target = 'cuda'
#target = tvm.target.cuda(model='1080ti',options="-libs=cudnn, cublas")
target = tvm.target.cuda(model='3060ti')
target_host = 'llvm'
else:
target = tvm.target.create('llvm -target=arm64-linux-android')
target_host = None
######################################################################
input the mxnet model
mx_sym, args, auxs = mx.model.load_checkpoint(path, 0)
import pdb
pdb.set_trace()
######################################################################
shape_dict = {'data': input_shape}
func, params = relay.frontend.from_mxnet(mx_sym, shape_dict, dtype, args, auxs)
######################################################################
now compile the graph
with relay.build_config(opt_level=3):
graph, lib, params = relay.build(func, target, params=params)
######################################################################
print("Compile...")
######################################################################
#save the relay model
temp = util.tempdir()
path_lib = temp.relpath("%s.%s.dll" % (path, device))
if use_android:
from tvm.contrib import ndk
if use_arm64:
lib.export_library(path_lib, ndk.create_shared)
else:
lib.export_library(path_lib, ndk.create_shared, options=["-shared", "-fPIC", "-mfloat-abi=softfp", "-mfpu=neon"])
else:
lib.export_library("%s.%s.dll" % (path, device))
with open("%s.%s.json" % (path, device), "w") as fo:
fo.write(graph)
with open("%s.%s.params" % (path, device), "wb") as fo:
fo.write(relay.save_param_dict(params))
print("------convert done!!!------")
import numpy as np
img = np.ones(input_shape) #NCHW(batch_size,1,240,320)
x = np.array(img)
######################################################################
from tvm.contrib import graph_runtime
import time
dtype = 'float32'
m = graph_runtime.create(graph, lib, ctx)
m.set_input('data', tvm.nd.array(x.astype(dtype)))
m.set_input(**params)
start = time.time()
count = 1
for i in range(count):
m.run()
end = (time.time()- start)/count
print ("the cost time is ", end)
#evaluate
print("Evaluate inference time cost...")
ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
tvm_output0 = m.get_output(0)
print('- tvm_output 0 shape : ', tvm_output0.shape)
test images
data_shape = input_shape
import cv2
def preprocess_img_single(img_path,data_shape):
img = cv2.imread(img_path,0)
img = cv2.resize(img,(data_shape[3],data_shape[2]))-128.0 #img.shape(240,320)
img = np.reshape(img,(data_shape[2],data_shape[3],1)) #单通道 img.shape(240,320,1)
img_data = np.transpose(np.array(img), (2, 0, 1)) #img.shape(1,240,320)
img_data = np.expand_dims(img_data, axis=0) #img.shape(1,1,240,320)
return img_data
Set inputs
img_path= model_path+"./00001.jpg"
img_data = preprocess_img_single(img_path,data_shape)
m = graph_runtime.create(graph, lib, ctx)
m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
m.set_input(**params)
m.run()
tvm_output = m.get_output(0).asnumpy() #, tvm.nd.empty(tuple(oshape[0]), dtype)
result=tvm_output[0,:]
resultfinal = result[result[:,0]!=-1].tolist()
print(resultfinal)
CPU result:
------convert done!!!------
the cost time is 0.008001565933227539
Evaluate inference time cost...
Mean inference time (std dev): 8.99 ms (0.39 ms)
- tvm_output 0 shape : (1, 5228, 6)
resultfinal:[[3.0, 0.9999833106994629, 0.266605406999588, 0.13155204057693481, 0.7422218322753906, 0.7783907651901245], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351758480072, 0.590116560459137, 0.5846942663192749]]
GPU result:
------convert done!!!------
the cost time is 0.6151375770568848
Evaluate inference time cost...
Mean inference time (std dev): 6.12 ms (0.42 ms)
- tvm_output 0 shape : (1, 5228, 6)
resultfinal:[[3.495429754257202, 8.788818359375, 0.696427583694458, 3.863145589828491, 2.051990509033203, 9.19469928741455], [1.7574224472045898, 4.362786769866943, 1.1608469486236572, -0.23158644139766693, 1.5107040405273438, 2.060492515563965], [0.342197448015213, 2.6218209266662598, -2.7281951904296875, -1.9947190284729004, -1.5453457832336426, -0.49175599217414856], [-1.8521333932876587, 1.7014127969741821, -0.8565990924835205, -2.5641982555389404, -0.38735270500183105, 3.0310404300689697], [0.07879126071929932, 1.0, 0.0, 0.8949449062347412, 0.04564562812447548, 1.0], [3.0, 0.9999833106994629, 0.2666054368019104, 0.1315521001815796, 0.7422217726707458, 0.7783908247947693], [0.0, 0.9999711513519287, 0.424368679523468, 0.3192351460456848, 0.590116560459137, 0.5846942067146301]]
the last two GPU result equal to CPU result.