In [1]:
using BenchmarkTools
using Distances
using NearestNeighborDescent
using NearestNeighborDescent: brute_knn, brute_search
using PyCall
@pyimport pynndescent

┌ Info: Recompiling stale cache file /home/dillon/.julia/compiled/v1.0/NearestNeighborDescent/dBdpT.ji for NearestNeighborDescent [1dd1f7d0-b91d-11e8-266b-d34208f3077b]
└ @ Base loading.jl:1190


In [2]:
include("./benchutils.jl");

In [3]:
function bmark(name, data, queries, metric)
    print(name, "\n")
    print("Graph construction: ")
    knn_graph = @btime DescentGraph($data, 10, $metric)
    nn = getindex.(knn_graph.graph, 1)
    brute_graph = brute_knn(data, metric, 10)
    true_nn = getindex.(brute_graph, 1)
    @show recall(nn, true_nn)
    true_idx, true_dist = brute_search(data, queries, 10, metric)
    idx, dist = search(knn_graph, queries, 10, 70)
    @show recall(idx, true_idx)
    q_per_sec = length(queries)/(@belapsed search($knn_graph, $queries, 10, 70))
    @show q_per_sec
    if metric == Euclidean()
        pynn_metric = "euclidean"
    elseif metric == CosineDist()
        pynn_metric = "cosine"
    elseif metric == Hamming()
        pynn_metric = "hamming"
    end
    print("pynn descent: ")
    matrix_data = permutedims(hcat(data...))
    pynn_graph = @btime pynndescent.NNDescent($matrix_data, n_neighbors=10, metric=$pynn_metric)
    pynn_nn = permutedims(pynn_graph[:_neighbor_graph][1]) .+ 1
    @show recall(pynn_nn, true_nn)
    matrix_queries = permutedims(hcat(queries...))
    pynn_idx, pynn_dists = pynn_graph[:query](matrix_queries, k=10)
    pynn_idx = permutedims(pynn_idx) .+ 1
    @show recall(pynn_idx, true_idx)
    return
end

bmark (generic function with 1 method)

In [4]:
bmark("Fashion MNIST", FMNIST_data, FMNIST_queries, Euclidean())

Fashion MNIST
Graph construction:   1.140 s (10439774 allocations: 506.60 MiB)
recall(nn, true_nn) = 0.9972200000000001
recall(idx, true_idx) = 0.9103999999999992
q_per_sec = 296.9891890577498
pynn descent:   2.293 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.8977400000000271
recall(pynn_idx, true_idx) = 0.9987999999999999


In [5]:
bmark("MNIST", MNIST_data, MNIST_queries, Euclidean())

MNIST
Graph construction:   1.124 s (9972703 allocations: 506.55 MiB)
recall(nn, true_nn) = 0.9880600000000003
recall(idx, true_idx) = 0.9875999999999997
q_per_sec = 279.25088327836283
pynn descent:   2.202 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.89204000000003
recall(pynn_idx, true_idx) = 0.9905999999999994


In [6]:
# Cosine Tests 
bmark("Cosine Random", rand_data, rand_queries, CosineDist())

Cosine Random
Graph construction:   1.704 s (21483978 allocations: 796.81 MiB)
recall(nn, true_nn) = 0.7490000000000093
recall(idx, true_idx) = 0.9237999999999983
q_per_sec = 272.41876357917977
pynn descent:   1.290 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.4840200000000018
recall(pynn_idx, true_idx) = 0.7649999999999996


In [7]:
# Hamming Tests
bmark("Hamming Random", ham_data, ham_queries, Hamming())

Hamming Random
Graph construction:   926.440 ms (6911597 allocations: 455.76 MiB)
recall(nn, true_nn) = 0.13180000000000455
recall(idx, true_idx) = 0.8383999999999984
q_per_sec = 194.33606844455386
pynn descent:   5.926 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.10416000000000263
recall(pynn_idx, true_idx) = 0.17880000000000001
