In [1]:
using BenchmarkTools
using Distances
using NearestNeighborDescent
using NearestNeighborDescent: brute_knn, brute_search
using PyCall
@pyimport pynndescent

In [2]:
include("./benchutils.jl");

In [3]:
function bmark(name, data, queries, metric)
    print(name, "\n")
    print("Graph construction: ")
    knn_graph = @btime DescentGraph($data, 10, $metric)
    nn = knn_graph.indices
    brute_graph = brute_knn(data, metric, 10)
    true_nn = getindex.(brute_graph, 1)
    @show recall(nn, true_nn)
    true_idx, true_dist = brute_search(data, queries, 10, metric)
    idx, dist = search(knn_graph, queries, 10, 5)
    @show recall(idx, true_idx)
    q_per_sec = length(queries)/(@belapsed search($knn_graph, $queries, 10, 10))
    @show q_per_sec
    if metric == Euclidean()
        pynn_metric = "euclidean"
    elseif metric == CosineDist()
        pynn_metric = "cosine"
    elseif metric == Hamming()
        pynn_metric = "hamming"
    end
    print("pynn descent: ")
    matrix_data = permutedims(hcat(data...))
    pynn_graph = @btime pynndescent.NNDescent($matrix_data, n_neighbors=10, metric=$pynn_metric)
    pynn_nn = permutedims(pynn_graph[:_neighbor_graph][1]) .+ 1
    @show recall(pynn_nn, true_nn)
    matrix_queries = permutedims(hcat(queries...))
    pynn_idx, pynn_dists = pynn_graph[:query](matrix_queries, k=10)
    pynn_idx = permutedims(pynn_idx) .+ 1
    @show recall(pynn_idx, true_idx)
    pynn_q_per_sec = length(queries) / (@belapsed $(pynn_graph)[:query]($matrix_queries, k=10))
    @show pynn_q_per_sec
    return
end

bmark (generic function with 1 method)

In [4]:
bmark("Fashion MNIST", FMNIST_data, FMNIST_queries, Euclidean())

Fashion MNIST
Graph construction:   1.210 s (19493689 allocations: 957.44 MiB)
recall(nn, true_nn) = 0.9968600000000006
recall(idx, true_idx) = 0.6827999999999997
q_per_sec = 2168.874502831067
pynn descent:   2.223 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.897680000000027
recall(pynn_idx, true_idx) = 0.9996
pynn_q_per_sec = 5140.270106595734


In [5]:
bmark("MNIST", MNIST_data, MNIST_queries, Euclidean())

MNIST
Graph construction:   1.184 s (18707990 allocations: 924.29 MiB)
recall(nn, true_nn) = 0.9882799999999999
recall(idx, true_idx) = 0.6645999999999995
q_per_sec = 2092.2045333628394
pynn descent:   2.149 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.8905400000000308
recall(pynn_idx, true_idx) = 0.9913999999999995
pynn_q_per_sec = 5281.019525745857


In [6]:
# Cosine Tests 
bmark("Cosine Random", rand_data, rand_queries, CosineDist())

Cosine Random
Graph construction:   2.029 s (35294082 allocations: 1.77 GiB)
recall(nn, true_nn) = 0.7418400000000052
recall(idx, true_idx) = 0.5147999999999997
q_per_sec = 1941.8670560840255
pynn descent:   1.260 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.480500000000001
recall(pynn_idx, true_idx) = 0.7534000000000001
pynn_q_per_sec = 9930.452078865665


In [7]:
# Hamming Tests
bmark("Hamming Random", ham_data, ham_queries, Hamming())

Hamming Random
Graph construction:   889.122 ms (12423877 allocations: 637.92 MiB)
recall(nn, true_nn) = 0.1297200000000037
recall(idx, true_idx) = 0.14760000000000034
q_per_sec = 1089.710352460188
pynn descent:   5.852 s (48 allocations: 2.39 KiB)
recall(pynn_nn, true_nn) = 0.1057400000000022
recall(pynn_idx, true_idx) = 0.18080000000000002
pynn_q_per_sec = 1056.3824773215756
