In [1]:
using BenchmarkTools
using Distances
using NearestNeighborDescent
using NearestNeighborDescent: brute_knn, brute_search
using PyCall
@pyimport pynndescent

In [2]:
include("./benchutils.jl");

In [3]:
function bmark(name, data, queries, metric)
    print(name, "\n")
    print("Graph construction: ")
    knn_graph = @btime DescentGraph($data, 10, $metric)
    nn = getindex.(knn_graph.graph, 1)
    brute_graph = brute_knn(data, metric, 10)
    true_nn = getindex.(brute_graph, 1)
    @show recall(nn, true_nn)
    true_idx, true_dist = brute_search(data, queries, 10, metric)
    idx, dist = search(knn_graph, queries, 10, 5)
    @show recall(idx, true_idx)
    q_per_sec = length(queries)/(@belapsed search($knn_graph, $queries, 10, 10))
    @show q_per_sec
    if metric == Euclidean()
        pynn_metric = "euclidean"
    elseif metric == CosineDist()
        pynn_metric = "cosine"
    elseif metric == Hamming()
        pynn_metric = "hamming"
    end
    print("pynn descent: ")
    matrix_data = permutedims(hcat(data...))
    pynn_graph = @btime pynndescent.NNDescent($matrix_data, n_neighbors=10, metric=$pynn_metric)
    pynn_nn = permutedims(pynn_graph[:_neighbor_graph][1]) .+ 1
    @show recall(pynn_nn, true_nn)
    matrix_queries = permutedims(hcat(queries...))
    pynn_idx, pynn_dists = pynn_graph[:query](matrix_queries, k=10)
    pynn_idx = permutedims(pynn_idx) .+ 1
    @show recall(pynn_idx, true_idx)
    pynn_q_per_sec = length(queries) / (@belapsed $(pynn_graph)[:query]($matrix_queries, k=10))
    @show pynn_q_per_sec
    return
end

bmark (generic function with 1 method)

In [4]:
bmark("Fashion MNIST", FMNIST_data, FMNIST_queries, Euclidean())

Fashion MNIST
Graph construction:   1.133 s (10389727 allocations: 505.43 MiB)
recall(nn, true_nn) = 0.9970799999999997
recall(idx, true_idx) = 0.6959999999999997
q_per_sec = 2022.47412892555
pynn descent:   2.277 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.897660000000027
recall(pynn_idx, true_idx) = 0.9986
pynn_q_per_sec = 5222.769830799597


In [5]:
bmark("MNIST", MNIST_data, MNIST_queries, Euclidean())

MNIST
Graph construction:   1.111 s (9989009 allocations: 506.91 MiB)
recall(nn, true_nn) = 0.9871200000000001
recall(idx, true_idx) = 0.6648
q_per_sec = 2018.416907823082
pynn descent:   2.243 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.8927200000000295
recall(pynn_idx, true_idx) = 0.9933999999999994
pynn_q_per_sec = 5160.41064236028


In [6]:
# Cosine Tests 
bmark("Cosine Random", rand_data, rand_queries, CosineDist())

Cosine Random
Graph construction:   1.690 s (21178989 allocations: 790.00 MiB)
recall(nn, true_nn) = 0.7514400000000074
recall(idx, true_idx) = 0.5402000000000001
q_per_sec = 1877.495224008874
pynn descent:   1.339 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.4788200000000019
recall(pynn_idx, true_idx) = 0.7752
pynn_q_per_sec = 9763.76454517768


In [8]:
# Hamming Tests
bmark("Hamming Random", ham_data, ham_queries, Hamming())

Hamming Random
Graph construction:   920.150 ms (6863274 allocations: 454.60 MiB)
recall(nn, true_nn) = 0.12552000000000418
recall(idx, true_idx) = 0.1520000000000003
q_per_sec = 1192.6602637825788
pynn descent:   5.949 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.10358000000000243
recall(pynn_idx, true_idx) = 0.17339999999999986
pynn_q_per_sec = 1059.547378524562
