# Part 1: Ensure PQLinearscann with a shared quantizer works

# Read SIFT 1 million 

In [1]:
using HDF5
using BenchmarkTools
using Distances
using LoopVectorization
using SIMD
using Clustering
using ProgressMeter
using StaticArrays
using DataFrames
using Plots

In [2]:
profile_flag = false
Sys.cpu_info()[1].model

"Apple M1 Pro"

In [3]:
path = joinpath(homedir(), "Datasets", "SIFT1M",  "sift-128-euclidean.hdf5")

"/Users/dbuchaca/Datasets/SIFT1M/sift-128-euclidean.hdf5"

In [4]:
f = h5open(path, "r")

X_tr_vecs = read(f["train"])
X_te_vecs = read(f["test"]);
true_neighbors = read(f["neighbors"])
true_distances = read(f["distances"])

true_neighbors .= true_neighbors .+ 1;

@show size(X_tr_vecs)
@show size(X_te_vecs)
@show size(true_neighbors)
@show size(true_distances)

size(X_tr_vecs) = (128, 1000000)
size(X_te_vecs) = (128, 10000)
size(true_neighbors) = (100, 10000)
size(true_distances) = (100, 10000)


(100, 10000)

## Clustering.jl

Create the pqcodes for the training points

In [5]:
n_features = size(X_tr_vecs,1)
n_clusters = 32
R_per_feature = []

@showprogress for j in  1:n_features
    R = kmeans(X_tr_vecs[[j],:], n_clusters; maxiter=200)
    push!(R_per_feature, R)
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:02:39[39m


In [76]:
P = hcat([r.centers[:] for r in R_per_feature]...);
println(P[:,1])
println(size(P))

Float32[123.9532, 2.4602568, 46.46598, 76.42616, 9.959858, 31.422665, 58.96036, 49.980793, 128.88298, 0.22927776, 93.36796, 19.457067, 84.87763, 109.06494, 15.44153, 36.935448, 150.0481, 69.41852, 26.458672, 52.49358, 139.60612, 115.08359, 119.503876, 4.47508, 54.980495, 133.79137, 41.93572, 63.950676, 6.9644637, 101.41437, 12.493844, 22.975471]
(32, 128)


In [77]:
P = hcat([r.centers[:] for r in R_per_feature]...);
println(P[:,1])
println(size(P))

Float32[123.9532, 2.4602568, 46.46598, 76.42616, 9.959858, 31.422665, 58.96036, 49.980793, 128.88298, 0.22927776, 93.36796, 19.457067, 84.87763, 109.06494, 15.44153, 36.935448, 150.0481, 69.41852, 26.458672, 52.49358, 139.60612, 115.08359, 119.503876, 4.47508, 54.980495, 133.79137, 41.93572, 63.950676, 6.9644637, 101.41437, 12.493844, 22.975471]
(32, 128)


In [73]:
if profile_flag
    @benchmark P = hcat([r.centers[:] for r in R_per_feature]...)
end

Another way to construct the matrix would be

In [74]:
function build_prototype_matrix(R, n_clusters, n_features)
    
    prototypes = Array{Float32}(undef, n_clusters, n_features);
    for j in 1:n_features
        prototypes[:,j] .= vec(R_per_feature[j].centers)
    end
    return prototypes
end

if profile_flag
    @benchmark P = build_prototype_matrix(R_per_feature, n_clusters, n_features)
end

In [75]:
P2 = build_prototype_matrix(R_per_feature, n_clusters, n_features);
isapprox(P, P2)

false

We can generate the PQcodes for the data as follows:

In [11]:
PQcodes = hcat([Int32.(r.assignments[:]) for r in R_per_feature]...)';
size(PQcodes)

(128, 1000000)

Here each vector prototype is in fact a single scalar (because subvectors have a single coordinate)

In [12]:
size(P)

(32, 128)

We need a method to, given a vector and a collection of prototypes per feature, find which are the closest

In [13]:

@inline function seuclidean(x::T,y::T) where {T}
    aux::T = (x - y)
    return aux * aux
end

@inline function euclidean(x::T,y::T) where {T}
    aux::T = (x - y)
    return sqrt(aux * aux)
end

function find_closest_coordinate(dist, 
                                 value::T,
                                 vector) where T
    best_coordinate = 1
    min_distance::T = typemax(T)
    for (j,x) in enumerate(vector)
        current_dist = dist(x, value)
        if current_dist < min_distance
            best_coordinate = j
            min_distance = current_dist
        end
    end
    return best_coordinate
end

query = X_te_vecs[:,1];
closest_prototypes = Array{UInt8}(undef, n_features, 1);

if profile_flag
    @benchmark find_closest_coordinate(euclidean, Float32(48.), query)
end

In [14]:
@assert 3 == find_closest_coordinate(euclidean, 2., [3.,5,2,1])

@assert 4 == find_closest_coordinate(euclidean, 1., [3.,5,2,1])

In [15]:
function encode(dist, vector, prototypes)
    closest_prototypes = Array{Int8}(undef, n_features, 1);
    @inbounds for (j,x) in enumerate(vector)
        closest_prototypes[j] = find_closest_coordinate(dist, x, prototypes[:,j])
    end
    return closest_prototypes
end

if profile_flag
    @benchmark encode(euclidean, query, P)
end

In [16]:
n_features, n_examples_test = size(X_te_vecs)

PQcodes_test = Array{Int16}(undef, n_features, n_examples_test);

for j in 1:n_examples_test
    PQcodes_test[:,j] = encode(euclidean, X_te_vecs[:,j] , P)  
end

In [17]:
function encode_fast(dist, vector::Array{T}, prototypes::Array{T}) where T
    
    n_clusters, n_features = size(prototypes)
    closest_prototypes = Array{Int8}(undef, n_features, 1);
    
    @inbounds for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(prototypes[k,j], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
           #println(k, ' ', j, ' ', best_coordinate, ' ',min_distance )
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

@assert isapprox(encode(euclidean, query, P),
                 encode_fast(euclidean, query, P))

if profile_flag
    @benchmark encode_fast(euclidean, query, P)
end

In [18]:
println(find_closest_coordinate(euclidean, query[1], P[:,1]))
println(find_closest_coordinate(euclidean, query[2], P[:,2]))
println(find_closest_coordinate(euclidean, query[3], P[:,3]))

10
12
20


In [19]:
function SEuclidean0(x, query)
    @assert length(x) == length(query)
    res = zero(eltype(x))
    @inbounds   for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return res
end

function Euclidean0(x, query)
    @assert length(x) == length(query)
    res = zero(eltype(x))
    @inbounds   for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function compute_ADC(query, prototypes, dist)
    n_clusters, n_features = size(prototypes)
    
    ADC_table = Array{Float32}(undef, n_clusters, n_features)
    
    for j in 1:n_features
        for p in 1:n_clusters
            ADC_table[p,j] = dist(query[j], prototypes[p,j] )
        end
    end
    return  ADC_table
end

adc_table = compute_ADC(query, P, euclidean)

if profile_flag
    @benchmark compute_ADC(query, P, euclidean)
end

In [20]:
function adc_dist(x_code,  adc_table)
    res = zero(eltype(adc_table))
    @inbounds @fastmath for j in eachindex(x_code)
        res+= adc_table[x_code[j], j]
    end
    return res
end

query_code = encode_fast(euclidean, query, P)
x_code = PQcodes[:,1];
adc_table = compute_ADC(query, P, euclidean);
print(adc_dist(x_code,  adc_table))

if profile_flag
    @benchmark adc_dist(x_code,  adc_table)
end

5188.176

In [21]:
function linear_scann(query, PQcodes, adc_table, P)
    
    query_code = encode_fast(sqeuclidean, query, P)
    n_features, n_examples = size(PQcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath for j in 1:n_examples
        distances[j] = adc_dist( view(PQcodes,:,j) ,  adc_table)    
    end
    return distances
end

if profile_flag
    @benchmark distances = linear_scann($query, $PQcodes, $adc_table, $P)
end

### Compute Recall PQLinearScann 

In [22]:
function recall(predicted, relevant, eval_at)
    """
    fraction of the relevant documents that are successfully retrieved
    """
    if eval_at == 0
        return 0.0
    end
    
    predicted_at_k = predicted[1:eval_at]
    n_predicted_and_relevant =  length(intersect( Set(predicted_at_k), Set(relevant))) 
    return n_predicted_and_relevant/ length(relevant)
end

query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_fast(euclidean, query, P)
x_code = PQcodes[:,1];
adc_table = compute_ADC(query, P, euclidean);
pq_distances = linear_scann(query, PQcodes, adc_table, P);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.68


### Decoding vectors from pqcode

In [23]:
PQcodes = hcat([Int32.(r.assignments[:]) for r in R_per_feature]...)';

In [24]:
function get_top_k_from_distances(X, distances, top_k)
    top_k_pq = sortperm(distances)[1:top_k];
    X_top = X[:,top_k_pq]
    return X_top
end

function decode(x_pqcode, P)
    n_features = length(x_pqcode)
    x_decoded = zeros(eltype(P), n_features)
    
    for j in eachindex(x_pqcode)
        x_decoded[j] = P[x_pqcode[j], j]
    end
    return x_decoded
end

x = X_te_vecs[:,1]
x_pqcode = vec(encode(Euclidean0, x, P));
x_decoded = decode(x_pqcode, P);

In [25]:
query = X_te_vecs[:,1]
adc_table = compute_ADC(query, P, Euclidean0)
pq_distances = linear_scann(query, PQcodes_test, adc_table, P);
x_closest = vec(get_top_k_from_distances(X_te_vecs, pq_distances, 1));
df_x = DataFrame(Dict(:x => x, 
                      :x_decoded => x_decoded,
                      :x_pqcode => x_pqcode,
                      :x_closest => x_closest
                      ))
first(df_x, 5)

Unnamed: 0_level_0,x,x_closest,x_decoded,x_pqcode
Unnamed: 0_level_1,Float32,Float32,Float32,Int8
1,1.0,1.0,0.229278,10
2,3.0,3.0,2.45516,12
3,11.0,11.0,10.9369,20
4,110.0,110.0,103.646,12
5,62.0,62.0,64.4257,26


##  PQLinearscann Sharing prototypes across features

https://groups.google.com/g/julia-users/c/xBcQRebyi_o



In [26]:
n_clusters = 32
#R_shared = kmeans(Matrix(vec(X_tr_vecs)'), n_clusters; maxiter=200)
#R_shared = kmeans(X_tr_vecs[[1],:], n_clusters; maxiter=200)
R_shared = R_per_feature[1]

KmeansResult{Matrix{Float32}, Float32, Int64}(Float32[123.9532 2.4602568 … 12.493844 22.975471], [10, 15, 10, 31, 10, 3, 10, 15, 29, 12  …  16, 5, 5, 6, 16, 7, 2, 6, 10, 22], Float32[0.05256829, 2.078003, 0.05256829, 0.24386597, 0.5940128, 2.3530273, 0.05256829, 0.31188965, 1.0723343, 2.3806763  …  4.262451, 0.0016021729, 0.9213257, 2.488037, 3.7458496, 0.0014648438, 0.2913227, 2.0239258, 0.05256829, 1.1738281], [11602, 66741, 21634, 22320, 52464, 47229, 20309, 14787, 9357, 202536  …  9550, 52006, 13227, 6351, 29853, 18206, 63822, 13575, 30781, 30496], [11602, 66741, 21634, 22320, 52464, 47229, 20309, 14787, 9357, 202536  …  9550, 52006, 13227, 6351, 29853, 18206, 63822, 13575, 30781, 30496], 1.3850728f6, 6, true)

In [27]:
function encode_shared(dist, vector::Array{T}, shared_prototypes::Array{T}) where T
    n_clusters = length(shared_prototypes)
    closest_prototypes = Array{Int8}(undef, n_features, 1);
    
    @inbounds for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(shared_prototypes[k], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
           #println(k, ' ', j, ' ', best_coordinate, ' ',min_distance )
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

"""
function encode_fast_shared(dist, vector::Array{T}, prototypes::Array{T}) where T
    
    n_features = length(vector)
    n_clusters = length(prototypes)
    closest_prototypes = Array{Int32}(undef, n_features, 1);
    
    @inbounds @fastmath for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(prototypes[k], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

if profile_flag
    @benchmark encode_fast_shared(euclidean, query, P_shared)
end
"""

P_shared = vec(R_shared.centers)

j = 3
x = X_tr_vecs[:,j]
x_pqcode_shared = encode_shared(sqeuclidean, x, P_shared);


P_shared_copied = hcat( [P_shared for i in 1:128]...);
@assert P_shared_copied[:,1] == P_shared
x_pqcode = encode(sqeuclidean, x, P_shared_copied);
x_decoded = decode(x_pqcode, P_shared_copied);

@assert x_pqcode_shared == x_pqcode

df_x = DataFrame(Dict(:x => x, 
                      :x_pqcode => vec(x_pqcode),
                      :x_pqcode_shared => vec(x_pqcode_shared),
                      :x_decoded => x_decoded,
                      #:centroid => vec(P_shared[x_pqcode])
                      ))

first(df_x, 5)

Unnamed: 0_level_0,x,x_decoded,x_pqcode,x_pqcode_shared
Unnamed: 0_level_1,Float32,Float32,Int8,Int8
1,0.0,0.229278,10,10
2,1.0,0.229278,10,10
3,5.0,4.47508,24,24
4,3.0,2.46026,2,2
5,44.0,41.9357,27,27


In [28]:
n_features, n_examples = size(X_tr_vecs)

PQcodes_shared = Array{Int32}(undef, n_features, n_examples);

for j in 1:n_examples
    PQcodes_shared[:,j] = encode_shared(euclidean, X_tr_vecs[:,j], P_shared)  
    #PQcodes_shared[:,j] = encode(euclidean, X_tr_vecs[:,j] ,P_shared_copied)  
end

@show maximum(PQcodes), maximum(PQcodes_shared)

(maximum(PQcodes), maximum(PQcodes_shared)) = (32, 32)


(32, 32)

Because we have created the shared quantizer from the quantizer of the PQquantizer of the first feature the following assertion must hold.

In other words, all encoded elements from the first feature (and there are 1 million items) must be mapped to the same integer

In [29]:
@assert PQcodes_shared[1,:] == PQcodes[1,:]

Now we need a new encoding function that uses a single vector of prototypes

In [30]:
function encode_fast_shared(dist, vector::Array{T}, prototypes::Array{T}) where T
    
    n_features = length(vector)
    n_clusters = length(prototypes)
    closest_prototypes = Array{Int32}(undef, n_features, 1);
    
    @inbounds @fastmath for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(prototypes[k], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

if profile_flag
    @benchmark encode_fast_shared(euclidean, query, P_shared)
end

@assert encode_fast_shared(euclidean, x ,vec(P_shared)) ==  encode(euclidean, x ,P_shared_copied);
@assert encode_fast_shared(euclidean, x ,vec(P_shared)) ==  encode(sqeuclidean, x ,P_shared_copied);

Now we will ensure that `compute_ADC_shared` provides the same ADC table computation as `compute_ADC`
in the case where `compute_ADC` is called with all prototypes being copies of column vector used in `compute_ADC_shared` 

In [31]:
function compute_ADC_shared(query, prototypes, dist)
    """
    Computes the distance between each query[k] and prototype[k]
    
    Arguments:
    
    - y (Array{T}): vector of n_features components.
    - prototypes (Array{T}): vector of n_cluster components.
    - dist (function): distance to be used to compare prototypes and query.
    
    """
    #@assert ndims(prototypes) ==1
    
    n_features = length(query)
    n_clusters = length(prototypes)
    ADC_table = Array{Float32}(undef, n_clusters, n_features)
    
    for j in 1:n_features       # 128
        for p in 1:n_clusters   # 32
            ADC_table[p,j] = dist(query[j], prototypes[p] )
        end
    end
    return  ADC_table
end

function compute_ADC(query, prototypes, dist)
    n_clusters, n_features = size(prototypes)
    
    ADC_table = Array{Float32}(undef, n_clusters, n_features)
    
    for j in 1:n_features       # 128
        for p in 1:n_clusters   # 32
            ADC_table[p,j] = dist(query[j], prototypes[p,j] )
        end
    end
    return  ADC_table
end

query = X_tr_vecs[:,1]
adc_table_shared = compute_ADC_shared(query, P_shared, euclidean);
adc_table = compute_ADC(query, P_shared_copied, euclidean);

@assert isapprox(adc_table, adc_table_shared)

Now we need to verify that the ADC distance `adc_dist_shared` is equal to `adc_dist`

In [32]:
function adc_dist_shared(x_code,  adc_table::Vector) 
    """
    This adc distance is the proposed one for BWcodes
    """
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(x_code)
        res+= adc_table[x_code[j]]
    end
    return res
end

function adc_dist_shared(x_code, adc_table::Matrix)
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(x_code)
        res+= adc_table[x_code[j],j]
    end
    return res
end

query = X_tr_vecs[:,1]
adc_table_shared = compute_ADC_shared(query, P_shared, euclidean);

query_code = encode_fast_shared(sqeuclidean, query, P_shared)
distance_adc = adc_dist( view(PQcodes,:,j) ,  adc_table_shared)  
distance_adc_shared = adc_dist_shared(  view(PQcodes,:,j) ,  adc_table_shared)  

@assert distance_adc == distance_adc_shared

Now we need to verify that scanning over the database we get the same results with `linear_scann_shared` and with `linear_scann`

In [33]:

function linear_scann_shared(query, PQcodes, adc_table_shared, P_shared)
    
    query_code = encode_fast_shared(sqeuclidean, query, P_shared)
    n_features, n_examples = size(PQcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath for j in 1:n_examples
        distances[j] = adc_dist_shared(  view(PQcodes,:,j) ,  adc_table_shared)    
    end
    return distances
end

if profile_flag
    @benchmark distances = linear_scann($query, $PQcodes, $adc_table_shared, $P_shared)
end


distances_shared = linear_scann_shared(query, PQcodes, adc_table_shared, P_shared);
distances = linear_scann(query, PQcodes, adc_table, P_shared_copied);

@assert isapprox(distances, distances_shared)


In [34]:
j=1
@show adc_dist(  view(PQcodes,:,j) ,  adc_table_shared)
@show adc_dist_shared(view(PQcodes,:,j) ,  adc_table_shared);

adc_dist(view(PQcodes, :, j), adc_table_shared) = 7167.8647f0
adc_dist_shared(view(PQcodes, :, j), adc_table_shared) = 7167.8647f0


It is important to recall

- `PQcodes` will not be the same as `PQcodes_shared`:

    - `PQcodes` is constructed form a `n_features` codebooks.
    - `PQcodes_shared` is constructed form a single codebook.
    

In [35]:
pq_distances = linear_scann(query, PQcodes_shared, adc_table, P);
#pq_distances = linear_scann_shared(query, PQcodes_shared, adc_table_shared, P_shared);

x_closest = vec(get_top_k_from_distances(X_tr_vecs, pq_distances, 1));

df_x = DataFrame(Dict(:x => x, 
                      :x_pqcode => vec(x_pqcode),
                      :x_closest => vec(x_closest)
                        
                      ))

first(df_x, 5)

Unnamed: 0_level_0,x,x_closest,x_pqcode
Unnamed: 0_level_1,Float32,Float32,Int8
1,0.0,0.0,10
2,1.0,16.0,10
3,5.0,35.0,24
4,3.0,5.0,2
5,44.0,32.0,27


### Recall PQlinearscann Sharing a copies Quantizer

Here we just verify that if we copy a quantizer many times we still get similar quality

In [36]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

x_code = PQcodes_shared[:,query_id];
adc_table = compute_ADC(query, P_shared_copied, euclidean);
pq_distances = linear_scann(query, PQcodes_shared, adc_table, P_shared_copied);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.67


In [37]:

function compute_mean_recall(X, PQcodes_shared, true_neighbors,
                             P_shared,P_shared_copied, top_k::Int, dist::Function)

    n_features, n_examples = size(X)

    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        adc_table_shared = compute_ADC(query, P_shared_copied, dist);
        pq_distances = linear_scann(query, PQcodes_shared, adc_table_shared, P_shared_copied);
        top_k_pq = sortperm(pq_distances)[1:top_k];
        average += recall(top_k_pq, true_neighbors[:,query_id], 100);
    end
    return average / n_examples

end

avg_recall = compute_mean_recall(X_te_vecs[:, 1:10], PQcodes_shared, true_neighbors,
                                 P_shared, P_shared_copied, top_k, euclidean)

0.6400000000000001

### Recall PQlinearscann Sharing a Quantizer

We don´t want to have copies of a quantizer, but have specific methods that use a single vector across features

In [38]:
function build_PQ_code_shared_copied(X_tr_vecs, P_shared_copied)
    n_features, n_examples = size(X_tr_vecs)

    PQcodes_shared = Array{Int32}(undef, n_features, n_examples);

    for j in 1:n_examples
        PQcodes_shared[:,j] = encode(euclidean, view(X_tr_vecs,:,j), P_shared_copied)  
    end

    @show maximum(PQcodes), maximum(PQcodes_shared)
    return PQcodes_shared
end

PQcodes_shared = build_PQ_code_shared_copied(X_tr_vecs, P_shared_copied);

(maximum(PQcodes), maximum(PQcodes_shared)) = (32, 32)


In [39]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

adc_table = compute_ADC(query, P_shared_copied, euclidean);
pq_distances = linear_scann_shared(query, PQcodes_shared, adc_table, P_shared);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.67


we can see that we can use `compute_ADC_shared` insead of `compute_ADC` and we get the same results

In [40]:
query_id = 3
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

adc_table_shared = compute_ADC_shared(query, P_shared, euclidean);
pq_distances = linear_scann_shared(query, PQcodes_shared, adc_table_shared, P_shared);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.57


In [41]:

function compute_mean_recall_shared(X, PQcodes_shared, true_neighbors,
                                    P_shared,P_shared_copied, top_k::Int, dist::Function)

    n_features, n_examples = size(X)

    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        query_true_neighbors = true_neighbors[:,query_id]
        adc_table_shared = compute_ADC_shared(query, P_shared, dist);
        pq_distances = linear_scann_shared(query, PQcodes_shared, adc_table_shared, P_shared)
        top_k_pq = sortperm(pq_distances)[1:top_k];
        r = recall(top_k_pq, query_true_neighbors, 100);
        average +=r
    end
    return average / n_examples

end

avg_recall = compute_mean_recall_shared(X_te_vecs[:, 1:10], PQcodes_shared, true_neighbors,
                                        P_shared, P_shared_copied, top_k, euclidean)

0.6400000000000001

In [42]:
@btime linear_scann_shared(query, PQcodes_shared, adc_table_shared, P_shared);

  42.171 ms (3 allocations: 3.82 MiB)


### Inspect idea of computing distances without adc table: directly from pqcodes


In [43]:
function abs_dist(x,y)
    res = zero(eltype(x))
    @inbounds @fastmath   for j in eachindex(x)
        res += abs(x[j] - y[j])
    end
    return res
end

PQcodes_uint8 = UInt8.(PQcodes);

query_code = UInt8.(vec(query_code))
aux = UInt8.(PQcodes_shared[:,100])
@benchmark abs_dist(query_code, aux)

BenchmarkTools.Trial: 10000 samples with 993 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m35.624 ns[22m[39m … [35m147.113 ns[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m36.043 ns               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m36.352 ns[22m[39m ± [32m  2.533 ns[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[39m█[34m▅[39m[32m [39m[39m▂[39m▃[39m▁[39m [39m [39m [39m [39m [39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[34m█[39m[32

In [44]:
aux = PQcodes_uint8[:,1]
@benchmark Euclidean0(query_code, aux)

BenchmarkTools.Trial: 10000 samples with 985 evaluations.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m52.665 ns[22m[39m … [35m84.983 ns[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m55.668 ns              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m56.292 ns[22m[39m ± [32m 2.815 ns[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m [39m [39m▁[39m▁[39m [39m▄[39m▄[39m▇[39m█[34m▇[39m[32m▃[39m[39m▂[39m▃[39m▃[39m▃[39m▂[39m▃[39m▂[39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m▆[39m▇[39m█[39m█[39m█[39m█

In [45]:
function abs_dist(y::Array{T}, X::Array{T}, j) where T
    res = zero(eltype(y))
    @inbounds @fastmath  for k in eachindex(y)
        res += abs(X[k, j] - y[j])
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end
    return distances
end

linear_scann_exact (generic function with 1 method)

In [46]:
#@benchmark linear_scann_exact(abs_dist, query_code, Matrix(PQcodes))

This is so slow because we are not using Uint8 codes

In [47]:
PQcodes_uint8 = UInt8.(PQcodes);

In [48]:
@benchmark linear_scann_exact(abs_dist, query_code, PQcodes_uint8)

BenchmarkTools.Trial: 1617 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m2.551 ms[22m[39m … [35m43.810 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 93.01%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m2.990 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m3.081 ms[22m[39m ± [32m 1.673 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m2.31% ±  3.99%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m▅[39m [39m▁[39m [39m [39m▂[39m▅[34m▆[39m[39m▆[39m▄[39m▂[32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▁[39m▃[39m▂[39m▁[39m▁[39m▁[39m▁[39m

The problem is that quality is bad

In [49]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = UInt8.(vec(query_code))


pq_distances = linear_scann_exact(abs_dist, query_code, PQcodes_uint8)
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.0


Neverhteles...

In [50]:

@inline function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, view(X,:,j))    
    end
    return distances
end

query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100


ex_distances = linear_scann_exact(Euclidean0, query, X_tr_vecs)
top_k_ex = sortperm(ex_distances)[1:top_k];

@show recall(top_k_ex, query_true_neighbors, 100);

recall(top_k_ex, query_true_neighbors, 100) = 1.0


In [78]:
R_shared.centers

1×32 Matrix{Float32}:
 123.953  2.46026  46.466  76.4262  …  6.96446  101.414  12.4938  22.9755

note that The quality might be bad using `abs_dist` bercuase centroids are not sorted

In [79]:
sort(R_shared.centers,dims=2)

1×32 Matrix{Float32}:
 0.229278  2.46026  4.47508  6.96446  …  128.883  133.791  139.606  150.048

In [226]:
@inline function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, view(X,:,j))    
    end
    return distances
end

linear_scann_exact (generic function with 1 method)

In [227]:
@benchmark linear_scann_exact(Euclidean0, query_code, PQcodes)

BenchmarkTools.Trial: 18 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m290.210 ms[22m[39m … [35m296.476 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.18%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m291.611 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m292.129 ms[22m[39m ± [32m  1.728 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.01% ± 0.04%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▃[34m█[39m[39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▇[39m▁[39m▇[39m▇

In [228]:
function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

Euclidean0 (generic function with 1 method)

In [229]:
aux  = X_tr_vecs[:,1]
@btime Euclidean0(query, aux)

  53.668 ns (1 allocation: 16 bytes)


368.97562f0

### Recall PQlinearscann Sharing a Quantizer


In [None]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

adc_table_shared = compute_ADC_shared(query, P_shared, euclidean);
pq_distances = linear_scann(query, PQcodes_shared, adc_table_shared, P_shared_copied);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

In [None]:
function encode_fast(dist, vector::Array{T}, prototypes::Array{T}) where T
    
    n_clusters, n_features = size(prototypes)
    closest_prototypes = Array{Int8}(undef, n_features, 1);
    
    @inbounds for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(prototypes[k,j], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
           #println(k, ' ', j, ' ', best_coordinate, ' ',min_distance )
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end



function adc_dist(x_code,  adc_table)
    res = zero(eltype(adc_table))
    @inbounds @fastmath for j in eachindex(x_code)
        res+= adc_table[x_code[j], j]
    end
    return res
end


function linear_scann(query, PQcodes, adc_table, P)
    
    query_code = encode_fast(sqeuclidean, query, P)
    n_features, n_examples = size(PQcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath for j in 1:n_examples
        distances[j] = adc_dist( view(PQcodes,:,j) ,  adc_table)    
    end
    return distances
end


function compute_mean_recall(X, PQcodes_shared, true_neighbors,
                             P_shared,P_shared_copied, top_k::Int, dist::Function)

    n_features, n_examples = size(X)
    
    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        adc_table_shared = compute_ADC(query, P_shared_copied, dist);
        pq_distances = linear_scann(query, PQcodes_shared, adc_table_shared, P_shared_copied);
        top_k_pq = sortperm(pq_distances)[1:top_k];
        average += recall(top_k_pq, true_neighbors[:,query_id], 100);
        println(rec," --> ", average)
    end
    return average / n_examples
end

avg_recall = compute_mean_recall(X_te_vecs[:, 1:5], PQcodes_shared, true_neighbors,
                                 P_shared, P_shared_copied, top_k, euclidean)

In [None]:
avg_recall = compute_mean_recall(X_te_vecs[:, 1:5], PQcodes_shared, true_neighbors,
                                 P_shared, P_shared_copied, top_k, euclidean)

# PART 2 -> Leverage BWcodes


## Improving distance between vectors with ADC_distance

In [None]:
function SEuclidean0(x, query)
    res = zero(eltype(x))
    @inbounds   for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return res
end

function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds   for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

if profile_flag
    x = X_tr_vecs[:,1]
    @benchmark Euclidean0($x, $query)
end

In [None]:
function adc_dist_shared(query_code, x_code,  adc_table)
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(x_code)
        res+= adc_table[x_code[j]]
    end
    return res
end

if profile_flag
    @benchmark adc_dist_shared($query_code, $x_code, $adc_table_shared)
end

In [None]:
function adc_dist_shared_unrolled(query_code, x_code,  adc_table)
    res1 = zero(eltype(adc_table))
    res2 = zero(eltype(adc_table))
    res3 = zero(eltype(adc_table))
    res4 = zero(eltype(adc_table))
    
    @inbounds @fastmath for j in 1:4:length(x_code)
        res1 += adc_table[x_code[j]]
        res2 += adc_table[x_code[j+1]]
        res3 += adc_table[x_code[j+2]]
        res4 += adc_table[x_code[j+3]]
    end
    
    return res1 + res2 + res3 + res4
end

if profile_flag
    @benchmark adc_dist_shared_unrolled($query_code, $x_code,  $adc_table_shared)
end

In [None]:

@assert adc_dist_shared(query_code, x_code,  adc_table_shared) == adc_dist_shared_unrolled(query_code, x_code,  adc_table_shared)

Note that euclidean squared distance with `@fastmath` is still faster

In [None]:
function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

if profile_flag
    @benchmark Euclidean0($x, $query)
end

## BWcode

When using a SharedQuantizer a value `c` in a pqcode found in different coordinates refers to the same real number. Note that the computation of an ADC distance  is a sum over  over ADC_table entries provided by the pqcode. This sum is done over the entire length of the pqcode. 

We propose to create a  `K`-dimensional vector, that we call BWcode, that compresses a pqcode into a vector containing the counts of the different assigments to each centroid prototype over the pqcode. For example if we had a pqcode `[1,2,4,1,1,3,3,4]` represented with 4 centroids, we would create a bw code `[3,1,2,2]`. Then the adcomputation over the pqcode is the same as the adc computation over a bwcode provided that we multiply each adc_table entry by the value in the bwcode.

To sum up, we can iterate over a `K`-dimensional vector that we call the BWcode and add the values in `adc_table_shared` weighted by the amount of times each prototype has been seen in the pqcode, making the computation faster as long as `K` is lower than the length of the pqcode.

In [None]:
function pq_code_to_bw_code(pqcode, n_clusters)
    bw_code = zeros(eltype(pqcode), n_clusters)
    for c in pqcode
        bw_code[c] += 1
    end
    return bw_code
end

In [None]:
n_examples = size(X_tr_vecs,2)
bw_code = pq_code_to_bw_code(x_code, n_clusters);

Now let´s verify that the distance between a query

In [None]:
function bw_adc_dist_shared(bw_code,  adc_table)
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(bw_code)
        res+= adc_table[j] * bw_code[j]
    end
    return res
end

if profile_flag
    @benchmark bw_adc_dist_shared( $bw_code, $adc_table_shared)
end

In [None]:
@benchmark bw_adc_dist_shared($bw_code, $adc_table_shared)

In [None]:
@assert isapprox(bw_adc_dist_shared(query_code, bw_code, adc_table_shared),
                 adc_dist_shared(query_code, x_code, adc_table_shared))

## Linear scann study

### Exact version

In [None]:
function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath  for j in 1:n_examples
        distances[j] = dist(query, X[:,j])    
    end
    return distances
end

if profile_flag
    @benchmark linear_scann_exact(Euclidean0, query, X_tr_vecs)
end

In [None]:
function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath Threads.@threads for j in 1:n_examples
        distances[j] = dist(query, X[:,j])    
    end
    return distances
end

if profile_flag
    @benchmark linear_scann_exact(Euclidean0, query, X_tr_vecs)
end

In [None]:
@inline function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, view(X,:,j))    
    end
    return distances
end

if profile_flag
    @benchmark linear_scann_exact(Euclidean0, query, X_tr_vecs)
end

In [None]:
@benchmark linear_scann_exact(Euclidean0, query, X_tr_vecs)

### ADC version

In [None]:
function linear_scann_shared(query, PQcodes, adc_table_shared, P_shared)
    
    query_code = encode_fast_shared(sqeuclidean, query, P_shared)
    n_features, n_examples = size(PQcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath for j in 1:n_examples
        distances[j] = adc_dist_shared(query_code,  view(PQcodes,:,j) ,  adc_table_shared)    
    end
    return distances
end

if profile_flag
    @benchmark distances = linear_scann($query, $PQcodes, $adc_table_shared, $P_shared)
end

In [None]:
 @benchmark distances = linear_scann($query, $PQcodes, $adc_table_shared, $P_shared)

In [None]:
function linear_scann_shared_unrolled(query, PQcodes, adc_table_shared, P_shared)
    
    query_code = encode_fast_shared(sqeuclidean, query, P_shared)
    n_features, n_examples = size(PQcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath Threads.@threads for j in 1:n_examples
        distances[j] = adc_dist_shared_unrolled(query_code,  PQcodes[:,j] ,  adc_table_shared)    
    end
    return distances
end

if profile_flag
    @benchmark distances = linear_scann_unrolled($query, $PQcodes, $adc_table_shared, $P_shared)
end

### ADC version with BW codes

Now we will build a Matrix containing BW codes and use it to compute the ADC distance

In [None]:
n_examples = size(X_tr_vecs,2)
BWcodes = Array{Int8}(undef, n_clusters, n_examples);
bw_code = pq_code_to_bw_code(x_code, n_clusters);

for j in 1:n_examples
   BWcodes[:,j] .= pq_code_to_bw_code(PQcodes_shared[:,j], n_clusters)
end

In [None]:
function bw_adc_dist_shared(query_code, bw_code,  adc_table)
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(bw_code)
        res+= adc_table[j] * bw_code[j]
    end
    return res
end

function linear_scann_bw(query, BWcodes, adc_table_shared, P_shared)
    
    query_code = encode_fast_shared(sqeuclidean, query, P_shared)
    n_features, n_examples = size(BWcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    n_clusters = size(BWcodes,1)
    #@show n_clusters
    @inbounds for i in 1:n_examples
        
        # Following code is equivalent to bw_adc_dist_shared(query_code, BWcodes[:,i],  adc_table_shared)
        res = zero(eltype(adc_table_shared))
        @simd for j in 1:n_clusters
            res+= adc_table_shared[j] * BWcodes[j,i]
        end        
        distances[i] = res  
    
    end
    return distances
end

if profile_flag
    @benchmark linear_scann_bw($query, $BWcodes, $adc_table_shared, $P_shared)
end

In [None]:
function bw_adc_dist_shared(query_code, bw_code,  adc_table)
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(bw_code)
        res+= adc_table[j] * bw_code[j]
    end
    return res
end

function linear_scann_bw_multithreaded(query, BWcodes, adc_table_shared, P_shared)
    
    query_code = encode_fast_shared(sqeuclidean, query, P_shared)
    n_features, n_examples = size(BWcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    n_clusters = size(BWcodes,1)
    #@show n_clusters
    @inbounds @fastmath Threads.@threads  for i in 1:n_examples
       distances[i] = bw_adc_dist_shared(query_code, view(BWcodes,:,i),  adc_table_shared)
    end    
    
    return distances
end

if profile_flag
    @benchmark distances = linear_scann_bw_multithreaded($query, $BWcodes, $adc_table_shared, $P_shared)
end

In [None]:
query = X_te_vecs[:,1]
adc_table = compute_ADC_shared(query, P_shared, euclidean)
#pq_distances = linear_scann(query, PQcodes_test, adc_table, P);
#closest_id = sortperm(pq_distances)[1]
#hcat(query, X_te_vecs[:, closest_id])[1:4,:]

In [None]:
query = X_te_vecs[:,1]
adc_table = compute_ADC(query, P, Euclidean0)
pq_distances = linear_scann(query, PQcodes_test, adc_table, P);
x_closest = vec(get_top_k_from_distances(X_te_vecs, pq_distances, 1));

df_x = DataFrame(Dict(:x => x, 
                      :x_decoded => x_decoded,
                      :x_pqcode => x_pqcode,
                      :x_closest => x_closest
                      ))

first(df_x, 5)

# Quality verification

Let's inspect the recall of pqlinearscann with respect to exact search:

In [None]:
function recall(predicted, relevant, eval_at)
    """
    fraction of the relevant documents that are successfully retrieved
    """
    if eval_at == 0
        return 0.0
    end
    
    predicted_at_k = predicted[1:eval_at]
    n_predicted_and_relevant =  length(intersect( Set(predicted_at_k), Set(relevant))) 
    return n_predicted_and_relevant/ length(relevant)
end

query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

n_examples = size(X_tr_vecs,2)
BWcodes = Array{Int8}(undef, n_clusters, n_examples);
bw_code = pq_code_to_bw_code(x_code, n_clusters);

for j in 1:n_examples
   BWcodes[:,j] .= pq_code_to_bw_code(PQcodes_shared[:,j], n_clusters)
end

adc_table_shared  = Float32.(compute_ADC_shared(query, P_shared, euclidean));
bw_distances = linear_scann_bw(query, BWcodes, adc_table_shared, P_shared);
top_k_bw = sortperm(bw_distances)[1:top_k];


In [None]:

pq_distances = linear_scann(query, PQcodes, adc_table, P);
top_k_pq = sortperm(pq_distances)[1:top_k];



In [None]:

pq_distances = linear_scann(query, PQcodes, adc_table, P);
top_k_pq = sortperm(pq_distances)[1:top_k];

ex_distances = linear_scann_exact(query, X_tr_vecs)
top_k_ex = sortperm(ex_distances)[1:top_k];

@show recall(top_k_ex, query_true_neighbors, 100)
@show recall(top_k_pq, query_true_neighbors, 100)
@show recall(top_k_bw, query_true_neighbors, 100)

df = DataFrame(Dict(:top_k_ex => top_k_ex,
                    :top_k_pq => top_k_pq,
                    :top_k_bw => top_k_bw))

In [None]:
bw_distances = linear_scann_bw(query, BWcodes, adc_table_shared, P_shared);
bw_distances

In [None]:
query

In [None]:
PQcodes[:,1]

In [None]:
plot(x, y)

In [None]:
X_tr_vecs[:,1]

In [None]:
n = 3
sqeuclidean(query, X_tr_vecs[:,n]) , distances[n], sqeuclidean(sqeuclidean(query, X_tr_vecs[:,n]), distances[n])

In [None]:
n = 4
sqeuclidean(distances[n], sqeuclidean(sqeuclidean(query, X_tr_vecs[:,n]), distances[n])

In [None]:
n = 1
sqeuclidean(query, X_tr_vecs[:,n]) , distances[n], sqeuclidean(sqeuclidean(query, X_tr_vecs[:,n]), distances[n])

## Euclidean  Squared Distance

In [None]:
function SEuclidean(X, query)
    d = (query .- X) .* (query .- X)
    return sum(d, dims=1)
end

In [None]:
function SEuclidean_2(X, query)
    n_features, n_examples = size(X)
    result = zeros(n_examples)
    for m in 1:n_examples
        res = zero(eltype(X))
        for j in 1:n_features
            aux = (query[j] .- X[j,m])
            res += aux * aux
        end
        result[m] = res
    end
    return result
end

In [None]:
query = X_te_vecs[:,1];

In [None]:
@benchmark SEuclidean(X_te_vecs, query)

In [None]:
@benchmark SEuclidean_2(X_te_vecs, query)

In [None]:
@benchmark SEuclidean(X_tr_vecs, query)

In [None]:
@benchmark SEuclidean_2(X_tr_vecs, query)

## Finding top k distances (and their ids)


The first naive thing we can do consist on computing all distances and then sorting them to get the top k closest vectors to the query vector

In [None]:
function top_k_ids(X, query)
    distances = SEuclidean_2(X, query)
    top_k_indices = sortperm(distances)
    return top_k_indices
end

In [None]:
@benchmark top_k_ids(X_te_vecs, query)[1:10]

A slightly better approach consist on using `partialsortperm` to simply sort a subset of the distances vector.

In [None]:
function top_k_ids_2(X, query, k)
    distances = SEuclidean_2(X, query)
    top_k_indices = partialsortperm(distances, 1:k)
    return top_k_indices
end

In [None]:
@benchmark top_k_ids_2(X_te_vecs, query, 10)

### Storing top k distances in a priority queue

A better alternative consists on using a priority queue. This. queue will keep only k distances in memory (no need to store all distances between the query point and all possible candidates).

In [None]:
sort!([1,54,3,24,10])

In [None]:
a = [1,2,3,4,5]

In [None]:
function SEuclidean_3(X, query, top_k)
    result = sort(SEuclidean_2(X[:, 1:top_k], query))
    n_features, n_examples = size(X)
    
    for m in top_k:n_examples
        res = zero(eltype(X))
        for j in 1:n_features
            aux = (query[j] .- X[j,m])
            res += aux * aux
        end
        dist = res/n_features
        
        # see if current mse is in the top pile
        if dist < result[end]
            j = top_k 
            #revisamos la lista resultado de izq a derecha
            while dist < result[j-1]
                j = j-1
                if j == 1
                    break
                end
            end
            
            result[j+1:end] .= result[j:end-1]
            result[j] = dist
         end

    end
    return result
end

In [None]:
@benchmark SEuclidean_3(X_tr_vecs, query, 10) 

In [None]:
@benchmark top_k_ids_2(X_tr_vecs, query, 10)

In [None]:
function SEuclidean_4(X, query, top_k)
    
    result = sort(SEuclidean_2(X[:, 1:top_k], query))
    n_features, n_examples = size(X)
    
    @inbounds @fastmath for m in top_k:n_examples
        res = zero(eltype(X))
        @simd for j in 1:n_features
            aux = (query[j] .- X[j,m])
            res += aux * aux
        end
        dist = res
        
        # see if current mse is in the top pile
        if dist < result[end]
            j = top_k 
            #revisamos la lista resultado de izq a derecha
            while dist < result[j-1]
                j = j-1
                if j == 1
                    break
                end
            end            
            result[j+1:end] .= result[j:end-1]
            result[j] = dist
         end
    end
    return result
end


In [None]:
@benchmark SEuclidean_4(X_tr_vecs, query, 10) 

In [None]:
X_tr_200k = X_tr_vecs[:,1:200_000]
@benchmark SEuclidean_4(X_tr_200k, query, 10) 

In [None]:
SEuclidean_4(X_tr_vecs, query, 10)

In [None]:
function SEuclidean_5(X, query, top_k)
    
    result = sort(SEuclidean_2(X[:, 1:top_k], query))
    n_features, n_examples = size(X)
    
    for m in top_k:n_examples
        res = zero(eltype(X))
        @turbo  for j in 1:n_features
            aux = (query[j] - X[j,m])
            res += aux * aux
        end
        dist = res
        
        # see if current mse is in the top pile
        if dist < result[end]
            j = top_k 
            #revisamos la lista resultado de izq a derecha
            while dist < result[j-1]
                j = j-1
                if j == 1
                    break
                end
            end            
            result[j+1:end] .= result[j:end-1]
            result[j] = dist
         end
    end
    return result
end

In [None]:
@benchmark SEuclidean_5(X_tr_200k, query, 10) 

# Comparison with Distances.jl


Distances.jl provides the method 'pairwise' to compute pairwise distances between two matrices.

In [None]:
@show typeof(query)
@show size(query)
@show typeof(X_tr_vecs)
@show size(X_tr_vecs)

In [None]:
v1 = [1,2,3]
v2 = [0,0,1]

y = [0,0,2]
X = [v1 v2]

In [None]:
function SEuclidean_5(X, query, top_k)
    
    result = sort(SEuclidean_5(X[:, 1:top_k], query))
    n_features, n_examples = size(X)
    
    for m in top_k:n_examples
        dist = zero(eltype(X))
        @turbo  for j in 1:n_features
            dist = (query[j] - X[j,m])
            dist += aux * aux
        end
        
        # see if current mse is in the top pile
        if dist < result[end]
            j = top_k 
            #revisamos la lista resultado de izq a derecha
            while dist < result[j-1]
                j = j-1
                if j == 1
                    break
                end
            end            
            result[j+1:end] .= result[j:end-1]
            result[j] = dist
         end
    end
    return result
end

In [None]:
function SEuclidean_2(X, query)
    n_features, n_examples = size(X)
    result = zeros(eltype(X), n_examples)
    for m in 1:n_examples
        res = zero(eltype(X))
        for j in 1:n_features
            aux = (query[j] .- X[j,m])
            res += aux * aux
        end
        result[m] = res
    end
    return result
end

In [None]:
y_colvec = [y y][:,[1]]
query_mat = [query query][:,[1]];

In [None]:
SEuclidean_2(X, y)

In [None]:
pairwise(SqEuclidean(), y_colvec, X)

In [None]:
@benchmark pairwise(SqEuclidean(), query_mat, X_tr_vecs)

In [None]:
@benchmark SEuclidean_2(X_tr_vecs, query)

In [None]:
@benchmark SEuclidean_4(X_tr_vecs, query, 10)

We can verify that the sorted distances by `SEuclidean_4` are the same as the results provided with `pairwise` from `Distances.jl`

In [None]:
res2 = SEuclidean_4(X_tr_vecs, query, 10);
res = pairwise(SqEuclidean(), query_mat, X_tr_vecs)
res = sort(res, dims=2)[1:10]
res == res2

## Parallel implementation

# SIMD tests

In [None]:

function find_val_in_array_simd(x::Array{T}, val::T) where {T}
    n_simd = 64
    last_pos_simd_chunk = length(x)-n_simd
    @inbounds for i in 1:n_simd:last_pos_simd_chunk
        vec_i = vload(Vec{n_simd, T}, x, i)
        sum_equality = sum(vec_i == val)
        if sum_equality >0
            return true
        end
    end

    @inbounds for i in last_pos_simd_chunk:length(x)
        if x[i] == val
            return true
        end
    end

    return false
end

In [None]:
function jdotavx(a, b)
    s = zero(eltype(a))
    @turbo for i ∈ eachindex(a, b)
        s += a[i] * b[i]
    end
    s
end