In [1]:
using BenchmarkTools
using Distances
using NearestNeighborDescent
using NearestNeighborDescent: brute_knn, brute_search
using PyCall
@pyimport pynndescent

In [2]:
include("./benchutils.jl");

In [3]:
function bmark(name, data, queries, metric)
    print(name, "\n")
    print("Graph construction: ")
    knn_graph = @btime DescentGraph($data, 10, $metric)
    nn = knn_graph.indices
    brute_graph = brute_knn(data, metric, 10)
    true_nn = getindex.(brute_graph, 1)
    @show recall(nn, true_nn)
    true_idx, true_dist = brute_search(data, queries, 10, metric)
    idx, dist = search(knn_graph, queries, 10, 5)
    @show recall(idx, true_idx)
    q_per_sec = length(queries)/(@belapsed search($knn_graph, $queries, 10, 10))
    @show q_per_sec
    if metric == Euclidean()
        pynn_metric = "euclidean"
    elseif metric == CosineDist()
        pynn_metric = "cosine"
    elseif metric == Hamming()
        pynn_metric = "hamming"
    end
    print("pynn descent: ")
    matrix_data = permutedims(hcat(data...))
    pynn_graph = @btime pynndescent.NNDescent($matrix_data, n_neighbors=10, metric=$pynn_metric)
    pynn_nn = permutedims(pynn_graph[:_neighbor_graph][1]) .+ 1
    @show recall(pynn_nn, true_nn)
    matrix_queries = permutedims(hcat(queries...))
    pynn_idx, pynn_dists = pynn_graph[:query](matrix_queries, k=10)
    pynn_idx = permutedims(pynn_idx) .+ 1
    @show recall(pynn_idx, true_idx)
    pynn_q_per_sec = length(queries) / (@belapsed $(pynn_graph)[:query]($matrix_queries, k=10))
    @show pynn_q_per_sec
    return
end

bmark (generic function with 1 method)

In [4]:
bmark("Fashion MNIST", FMNIST_data, FMNIST_queries, Euclidean())

Fashion MNIST
Graph construction:   813.979 ms (9802387 allocations: 297.69 MiB)
recall(nn, true_nn) = 0.9968200000000003
recall(idx, true_idx) = 0.6689999999999997
q_per_sec = 2686.769534868554
pynn descent:   2.176 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.8976800000000273
recall(pynn_idx, true_idx) = 0.9984
pynn_q_per_sec = 5188.155784836761


In [5]:
bmark("MNIST", MNIST_data, MNIST_queries, Euclidean())

MNIST
Graph construction:   785.094 ms (9385471 allocations: 298.79 MiB)
recall(nn, true_nn) = 0.9874000000000002
recall(idx, true_idx) = 0.6895999999999997
q_per_sec = 2655.151268534629
pynn descent:   2.118 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.89118000000003
recall(pynn_idx, true_idx) = 0.9903999999999996
pynn_q_per_sec = 5228.639025313086


In [6]:
# Cosine Tests 
bmark("Cosine Random", rand_data, rand_queries, CosineDist())

Cosine Random
Graph construction:   1.302 s (19815160 allocations: 565.24 MiB)
recall(nn, true_nn) = 0.7393000000000075
recall(idx, true_idx) = 0.5287999999999999
q_per_sec = 2715.4288019700284
pynn descent:   1.233 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.48460000000000125
recall(pynn_idx, true_idx) = 0.7529999999999999
pynn_q_per_sec = 10193.210885280976


In [7]:
# Hamming Tests
bmark("Hamming Random", ham_data, ham_queries, Hamming())

Hamming Random
Graph construction:   628.799 ms (6492427 allocations: 252.12 MiB)
recall(nn, true_nn) = 0.127320000000004
recall(idx, true_idx) = 0.1564000000000003
q_per_sec = 1375.0560844125148
pynn descent:   5.878 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.10702000000000282
recall(pynn_idx, true_idx) = 0.186
pynn_q_per_sec = 1063.9135896702196
