Skip to content

Commit

Permalink
speed improvement attempts for CUDA postprocessing
Browse files Browse the repository at this point in the history
  • Loading branch information
IanButterworth committed Sep 20, 2019
1 parent 2a9217b commit c8a26b9
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
26 changes: 14 additions & 12 deletions src/postprocess.jl
Expand Up @@ -7,32 +7,32 @@ Post processing function.
Confidence score threshold to select correct predictions. Recommended : 0.3
IoU threshold to remove unnecessary predictions: Recommended:0.3
"""
function postprocess(yolomat::xtype,settings::Settings; conf_thresh::T = 0.3, iou_thresh::T = 0.3) where {T<:AbstractFloat}
function postprocess(yolomat::Array{Float32},settings::Settings; conf_thresh::T = 0.3, iou_thresh::T = 0.3) where {T<:AbstractFloat}
im_w = settings.image_shape[1]
im_h = settings.image_shape[2]
num_images = size(yolomat,4)
all_detections = map(x->PredictLabel[],1:num_images)
RATE = 32
@views for i in 1:num_images
for i in 1:num_images
for cy in 1:13
for cx in 1:13
for b in 1:5
@views for b in 1:5
channel = (b-1)*(settings.num_classes + 5)
tx = yolomat[cy,cx,channel+1,i]
ty = yolomat[cy,cx,channel+2,i]
tw = yolomat[cy,cx,channel+3,i]
th = yolomat[cy,cx,channel+4,i]
tc = yolomat[cy,cx,channel+5,i]
x = Float32((sigmoid(tx) + cx-1) * RATE)
y = Float32((sigmoid(ty) + cy-1) * RATE)
w = Float32(exp(tw) * (settings.anchors[b][1]) * RATE)
h = Float32(exp(th) * (settings.anchors[b][2]) * RATE)
conf = Float32(sigmoid(tc))
classScores = Array{Float32}(yolomat[cy,cx,channel+6:channel+25,i])
x = (sigmoid(tx) + cx-1) * RATE
y = (sigmoid(ty) + cy-1) * RATE
w = exp(tw) * settings.anchors[b][1] * RATE
h = exp(th) * settings.anchors[b][2] * RATE
conf = sigmoid(tc)
classScores = yolomat[cy,cx,channel+6:channel+25,i]
classScores = softmax(classScores)
classNo = argmax(classScores)
bestScore = classScores[classNo]
classConfidenceScore = conf*bestScore
classConfidenceScore = conf * bestScore
if classConfidenceScore > conf_thresh
bbox = BBOX(
x = max(0.0,x-w/2)/im_w,
Expand All @@ -53,7 +53,9 @@ function postprocess(yolomat::xtype,settings::Settings; conf_thresh::T = 0.3, io
return all_detections
end
end

function postprocess(yolomat::KnetArray{Float32}, settings::Settings; conf_thresh::T = 0.3, iou_thresh::T = 0.3) where {T<:AbstractFloat}
postprocess(Array{Float32}(yolomat), settings; conf_thresh = conf_thresh, iou_thresh = iou_thresh)
end
"""
nonMaxSupression!(detections::Vector{PredictLabel}, iou_thresh::Float32)
Expand All @@ -63,7 +65,7 @@ function nonMaxSupression!(detections::Vector{PredictLabel}, iou_thresh::T) wher
sort!(detections, by = x -> x.conf, rev = true)
for i = 1:length(detections)
k = i + 1
while k <= length(detections)
@views while k <= length(detections)
iou = ioumatch(detections[i].bbox, detections[k].bbox)
if iou > iou_thresh && (detections[i].class == detections[k].class)
deleteat!(detections, k)
Expand Down
21 changes: 16 additions & 5 deletions test/runtests.jl
Expand Up @@ -93,14 +93,25 @@ end
inference_rate = 1 / inference_time
@test inference_time < 1.0 #seconds

predictions = YOLO.postprocess(res, settings, conf_thresh = 0.1, iou_thresh = 0.3)
predictions = YOLO.postprocess(res, settings, conf_thresh = 0.3, iou_thresh = 0.3)

if false #disabled because Makie can't be tested on headless CI
scene = YOLO.renderResult(vocloaded.imstack_mat[:,:,:,1], predictions[1], settings, save_file = "test.png")
@test isfile("test.png")
rm("test.png", force=true)
t = @elapsed for i in 1:10
YOLO.postprocess(res, settings, conf_thresh = 0.3, iou_thresh = 0.3)
end

postprocess_time = (t / 10) / num_images
postprocess_rate = 1 / postprocess_time
@test postprocess_time < 1.0 #seconds

## Makie Tests
#disabled because Makie can't be tested on headless CI
# scene = YOLO.renderResult(vocloaded.imstack_mat[:,:,:,1], predictions[1], settings, save_file = "test.png")
# @test isfile("test.png")
# rm("test.png", force=true)

enable_info()
@info "YOLO_v2_tiny inference time per image: $(round(inference_time, digits=2)) seconds ($(round(inference_rate, digits=2)) fps)"
@info "YOLO_v2_tiny postprocess time per image: $(round(postprocess_time, digits=4)) seconds ($(round(postprocess_rate, digits=2)) fps)"
@info "Total time per image: $(round(inference_time + postprocess_time, digits=2)) seconds ($(round(1/(inference_time + postprocess_time), digits=2)) fps)"

end

0 comments on commit c8a26b9

Please sign in to comment.