# Part 1: Ensure PQLinearscann with a shared quantizer works

# Read SIFT 1 million 

In [1]:
using HDF5
using BenchmarkTools
using Distances
using LoopVectorization
using SIMD
using Clustering
using ProgressMeter
using StaticArrays
using DataFrames
using Plots

In [2]:
profile_flag = false
Sys.cpu_info()[1].model

"Apple M1 Pro"

In [3]:
path = joinpath(homedir(), "Datasets", "SIFT1M",  "sift-128-euclidean.hdf5")

"/Users/dbuchaca/Datasets/SIFT1M/sift-128-euclidean.hdf5"

In [4]:
f = h5open(path, "r")

X_tr_vecs = read(f["train"])
X_te_vecs = read(f["test"]);
true_neighbors = read(f["neighbors"])
true_distances = read(f["distances"])

true_neighbors .= true_neighbors .+ 1;

@show size(X_tr_vecs)
@show size(X_te_vecs)
@show size(true_neighbors)
@show size(true_distances)

size(X_tr_vecs) = (128, 1000000)
size(X_te_vecs) = (128, 10000)
size(true_neighbors) = (100, 10000)
size(true_distances) = (100, 10000)


(100, 10000)

## Clustering.jl

Create the pqcodes for the training points

In [5]:
n_features = size(X_tr_vecs,1)
n_clusters = 32
R_per_feature = []

@showprogress for j in  1:n_features
    R = kmeans(X_tr_vecs[[j],:], n_clusters; maxiter=200)
    push!(R_per_feature, R)
end

[32mProgress: 100%|█████████████████████████████████████████| Time: 0:02:43[39m


In [6]:
P = hcat([r.centers[:] for r in R_per_feature]...);
println(P[:,1])
println(size(P))

Float32[0.49666122, 80.90828, 32.94114, 43.903812, 119.045685, 21.973886, 8.950528, 145.65755, 37.948563, 134.67995, 63.98707, 74.986145, 14.96987, 18.451147, 94.42536, 70.46136, 50.41857, 60.48038, 28.983364, 126.29258, 112.17338, 3.9273367, 11.48651, 87.93994, 24.492191, 26.49079, 105.47092, 66.97982, 55.959335, 6.483061, 99.9554, 13.0]
(32, 128)


In [7]:
P = hcat([r.centers[:] for r in R_per_feature]...);
println(P[:,1])
println(size(P))

Float32[0.49666122, 80.90828, 32.94114, 43.903812, 119.045685, 21.973886, 8.950528, 145.65755, 37.948563, 134.67995, 63.98707, 74.986145, 14.96987, 18.451147, 94.42536, 70.46136, 50.41857, 60.48038, 28.983364, 126.29258, 112.17338, 3.9273367, 11.48651, 87.93994, 24.492191, 26.49079, 105.47092, 66.97982, 55.959335, 6.483061, 99.9554, 13.0]
(32, 128)


In [8]:
if profile_flag
    @benchmark P = hcat([r.centers[:] for r in R_per_feature]...)
end

Another way to construct the matrix would be

In [9]:
function build_prototype_matrix(R, n_clusters, n_features)
    
    prototypes = Array{Float32}(undef, n_clusters, n_features);
    for j in 1:n_features
        prototypes[:,j] .= vec(R_per_feature[j].centers)
    end
    return prototypes
end

if profile_flag
    @benchmark P = build_prototype_matrix(R_per_feature, n_clusters, n_features)
end

In [10]:
P2 = build_prototype_matrix(R_per_feature, n_clusters, n_features);
isapprox(P, P2)

true

We can generate the PQcodes for the data as follows:

In [11]:
PQcodes = hcat([Int32.(r.assignments[:]) for r in R_per_feature]...)';
size(PQcodes)

(128, 1000000)

Here each vector prototype is in fact a single scalar (because subvectors have a single coordinate)

In [12]:
size(P)

(32, 128)

We need a method to, given a vector and a collection of prototypes per feature, find which are the closest

In [13]:

@inline function seuclidean(x::T,y::T) where {T}
    aux::T = (x - y)
    return aux * aux
end

@inline function euclidean(x::T,y::T) where {T}
    aux::T = (x - y)
    return sqrt(aux * aux)
end

function find_closest_coordinate(dist, 
                                 value::T,
                                 vector) where T
    best_coordinate = 1
    min_distance::T = typemax(T)
    for (j,x) in enumerate(vector)
        current_dist = dist(x, value)
        if current_dist < min_distance
            best_coordinate = j
            min_distance = current_dist
        end
    end
    return best_coordinate
end

query = X_te_vecs[:,1];
closest_prototypes = Array{UInt8}(undef, n_features, 1);

if profile_flag
    @benchmark find_closest_coordinate(euclidean, Float32(48.), query)
end

In [14]:
@assert 3 == find_closest_coordinate(euclidean, 2., [3.,5,2,1])

@assert 4 == find_closest_coordinate(euclidean, 1., [3.,5,2,1])

In [15]:
function encode(dist, vector, prototypes)
    closest_prototypes = Array{Int8}(undef, n_features, 1);
    @inbounds for (j,x) in enumerate(vector)
        closest_prototypes[j] = find_closest_coordinate(dist, x, prototypes[:,j])
    end
    return closest_prototypes
end

if profile_flag
    @benchmark encode(euclidean, query, P)
end

In [16]:
n_features, n_examples_test = size(X_te_vecs)

PQcodes_test = Array{Int16}(undef, n_features, n_examples_test);

for j in 1:n_examples_test
    PQcodes_test[:,j] = encode(euclidean, X_te_vecs[:,j] , P)  
end

In [17]:
function encode_fast(dist, vector::Array{T}, prototypes::Array{T}) where T
    
    n_clusters, n_features = size(prototypes)
    closest_prototypes = Array{Int8}(undef, n_features, 1);
    
    @inbounds for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(prototypes[k,j], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
           #println(k, ' ', j, ' ', best_coordinate, ' ',min_distance )
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

@assert isapprox(encode(euclidean, query, P),
                 encode_fast(euclidean, query, P))

if profile_flag
    @benchmark encode_fast(euclidean, query, P)
end

In [18]:
println(find_closest_coordinate(euclidean, query[1], P[:,1]))
println(find_closest_coordinate(euclidean, query[2], P[:,2]))
println(find_closest_coordinate(euclidean, query[3], P[:,3]))

1
28
29


In [19]:
function SEuclidean0(x, query)
    @assert length(x) == length(query)
    res = zero(eltype(x))
    @inbounds   for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return res
end

function Euclidean0(x, query)
    @assert length(x) == length(query)
    res = zero(eltype(x))
    @inbounds   for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function compute_ADC(query, prototypes, dist)
    n_clusters, n_features = size(prototypes)
    
    ADC_table = Array{Float32}(undef, n_clusters, n_features)
    
    for j in 1:n_features
        for p in 1:n_clusters
            ADC_table[p,j] = dist(query[j], prototypes[p,j] )
        end
    end
    return  ADC_table
end

adc_table = compute_ADC(query, P, euclidean)

if profile_flag
    @benchmark compute_ADC(query, P, euclidean)
end

In [20]:
function adc_dist(x_code,  adc_table)
    res = zero(eltype(adc_table))
    @inbounds @fastmath for j in eachindex(x_code)
        res+= adc_table[x_code[j], j]
    end
    return res
end

query_code = encode_fast(euclidean, query, P)
x_code = PQcodes[:,1];
adc_table = compute_ADC(query, P, euclidean);
print(adc_dist(x_code,  adc_table))

if profile_flag
    @benchmark adc_dist(query_code, x_code,  adc_table)
end

5177.5645

In [21]:
function linear_scann(query, PQcodes, adc_table, P)
    
    query_code = encode_fast(sqeuclidean, query, P)
    n_features, n_examples = size(PQcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath for j in 1:n_examples
        distances[j] = adc_dist( view(PQcodes,:,j) ,  adc_table)    
    end
    return distances
end

if profile_flag
    @benchmark distances = linear_scann($query, $PQcodes, $adc_table, $P)
end

### Compute Recall PQLinearScann 

In [22]:
function recall(predicted, relevant, eval_at)
    """
    fraction of the relevant documents that are successfully retrieved
    """
    if eval_at == 0
        return 0.0
    end
    
    predicted_at_k = predicted[1:eval_at]
    n_predicted_and_relevant =  length(intersect( Set(predicted_at_k), Set(relevant))) 
    return n_predicted_and_relevant/ length(relevant)
end

query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_fast(euclidean, query, P)
x_code = PQcodes[:,1];
adc_table = compute_ADC(query, P, euclidean);
pq_distances = linear_scann(query, PQcodes, adc_table, P);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.69


### Decoding vectors from pqcode

In [23]:
PQcodes = hcat([Int32.(r.assignments[:]) for r in R_per_feature]...)';

In [24]:
function get_top_k_from_distances(X, distances, top_k)
    top_k_pq = sortperm(distances)[1:top_k];
    X_top = X[:,top_k_pq]
    return X_top
end

function decode(x_pqcode, P)
    n_features = length(x_pqcode)
    x_decoded = zeros(eltype(P), n_features)
    
    for j in eachindex(x_pqcode)
        x_decoded[j] = P[x_pqcode[j], j]
    end
    return x_decoded
end

x = X_te_vecs[:,1]
x_pqcode = vec(encode(Euclidean0, x, P));
x_decoded = decode(x_pqcode, P);

In [25]:
query = X_te_vecs[:,1]
adc_table = compute_ADC(query, P, Euclidean0)
pq_distances = linear_scann(query, PQcodes_test, adc_table, P);
x_closest = vec(get_top_k_from_distances(X_te_vecs, pq_distances, 1));
df_x = DataFrame(Dict(:x => x, 
                      :x_decoded => x_decoded,
                      :x_pqcode => x_pqcode,
                      :x_closest => x_closest
                      ))
first(df_x, 5)

Unnamed: 0_level_0,x,x_closest,x_decoded,x_pqcode
Unnamed: 0_level_1,Float32,Float32,Float32,Int8
1,1.0,1.0,0.496661,1
2,3.0,3.0,3.0,28
3,11.0,11.0,10.4715,29
4,110.0,110.0,110.286,20
5,62.0,62.0,60.3883,3


##  PQLinearscann Sharing prototypes across features

https://groups.google.com/g/julia-users/c/xBcQRebyi_o



In [26]:
n_clusters = 32
#R_shared = kmeans(Matrix(vec(X_tr_vecs)'), n_clusters; maxiter=200)
#R_shared = kmeans(X_tr_vecs[[1],:], n_clusters; maxiter=200)
R_shared = R_per_feature[1]

KmeansResult{Matrix{Float32}, Float32, Int64}(Float32[0.49666122 80.90828 … 99.9554 13.0], [1, 13, 1, 23, 1, 17, 1, 13, 7, 6  …  9, 7, 7, 3, 3, 18, 22, 19, 1, 21], Float32[0.24667236, 0.9406433, 0.24667236, 0.26367188, 0.25334996, 5.8496094, 0.24667236, 1.0611572, 0.9035034, 0.9484253  …  1.1054688, 1.1013947, 0.0024414062, 0.0034179688, 4.2390137, 2.1914062, 0.8599529, 1.0335693, 0.24667236, 3.3378906], [238559, 17880, 37868, 39963, 16548, 31708, 56274, 2742, 32797, 11123  …  32024, 15651, 19275, 18295, 9764, 10207, 21615, 43568, 8654, 15201], [238559, 17880, 37868, 39963, 16548, 31708, 56274, 2742, 32797, 11123  …  32024, 15651, 19275, 18295, 9764, 10207, 21615, 43568, 8654, 15201], 1.4650838f6, 6, true)

In [27]:
function encode_shared(dist, vector::Array{T}, shared_prototypes::Array{T}) where T
    n_clusters = length(shared_prototypes)
    closest_prototypes = Array{Int8}(undef, n_features, 1);
    
    @inbounds for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(shared_prototypes[k], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
           #println(k, ' ', j, ' ', best_coordinate, ' ',min_distance )
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

"""
function encode_fast_shared(dist, vector::Array{T}, prototypes::Array{T}) where T
    
    n_features = length(vector)
    n_clusters = length(prototypes)
    closest_prototypes = Array{Int32}(undef, n_features, 1);
    
    @inbounds @fastmath for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(prototypes[k], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

if profile_flag
    @benchmark encode_fast_shared(euclidean, query, P_shared)
end
"""

P_shared = vec(R_shared.centers)

j = 3
x = X_tr_vecs[:,j]
x_pqcode_shared = encode_shared(sqeuclidean, x, P_shared);


P_shared_copied = hcat( [P_shared for i in 1:128]...);
@assert P_shared_copied[:,1] == P_shared
x_pqcode = encode(sqeuclidean, x, P_shared_copied);
x_decoded = decode(x_pqcode, P_shared_copied);

@assert x_pqcode_shared == x_pqcode

df_x = DataFrame(Dict(:x => x, 
                      :x_pqcode => vec(x_pqcode),
                      :x_pqcode_shared => vec(x_pqcode_shared),
                      :x_decoded => x_decoded,
                      #:centroid => vec(P_shared[x_pqcode])
                      ))

first(df_x, 5)

Unnamed: 0_level_0,x,x_decoded,x_pqcode,x_pqcode_shared
Unnamed: 0_level_1,Float32,Float32,Int8,Int8
1,0.0,0.496661,1,1
2,1.0,0.496661,1,1
3,5.0,3.92734,22,22
4,3.0,3.92734,22,22
5,44.0,43.9038,4,4


In [28]:
n_features, n_examples = size(X_tr_vecs)

PQcodes_shared = Array{Int32}(undef, n_features, n_examples);

for j in 1:n_examples
    PQcodes_shared[:,j] = encode_shared(euclidean, X_tr_vecs[:,j], P_shared)  
    #PQcodes_shared[:,j] = encode(euclidean, X_tr_vecs[:,j] ,P_shared_copied)  
end

@show maximum(PQcodes), maximum(PQcodes_shared)

(maximum(PQcodes), maximum(PQcodes_shared)) = (32, 32)


(32, 32)

Because we have created the shared quantizer from the quantizer of the PQquantizer of the first feature the following assertion must hold.

In other words, all encoded elements from the first feature (and there are 1 million items) must be mapped to the same integer

In [29]:
@assert PQcodes_shared[1,:] == PQcodes[1,:]

Now we need a new encoding function that uses a single vector of prototypes

In [30]:
function encode_fast_shared(dist, vector::Array{T}, prototypes::Array{T}) where T
    
    n_features = length(vector)
    n_clusters = length(prototypes)
    closest_prototypes = Array{Int32}(undef, n_features, 1);
    
    @inbounds @fastmath for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(prototypes[k], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

if profile_flag
    @benchmark encode_fast_shared(euclidean, query, P_shared)
end

@assert encode_fast_shared(euclidean, x ,vec(P_shared)) ==  encode(euclidean, x ,P_shared_copied);
@assert encode_fast_shared(euclidean, x ,vec(P_shared)) ==  encode(sqeuclidean, x ,P_shared_copied);

Now we will ensure that `compute_ADC_shared` provides the same ADC table computation as `compute_ADC`
in the case where `compute_ADC` is called with all prototypes being copies of column vector used in `compute_ADC_shared` 

In [31]:
function compute_ADC_shared(query, prototypes, dist)
    """
    Computes the distance between each query[k] and prototype[k]
    
    Arguments:
    
    - y (Array{T}): vector of n_features components.
    - prototypes (Array{T}): vector of n_cluster components.
    - dist (function): distance to be used to compare prototypes and query.
    
    """
    #@assert ndims(prototypes) ==1
    
    n_features = length(query)
    n_clusters = length(prototypes)
    ADC_table = Array{Float32}(undef, n_clusters, n_features)
    
    for j in 1:n_features       # 128
        for p in 1:n_clusters   # 32
            ADC_table[p,j] = dist(query[j], prototypes[p] )
        end
    end
    @assert ndims(ADC_table)==2
    return  ADC_table
end

query = X_tr_vecs[:,1]
adc_table_shared = compute_ADC_shared(query, P_shared, euclidean);
adc_table = compute_ADC(query, P_shared_copied, euclidean);

@assert isapprox(adc_table, adc_table_shared)

Now we need to verify that the ADC distance `adc_dist_shared` is equal to `adc_dist`

In [114]:
function adc_dist_shared(x_code,  adc_table::Vector) 
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(x_code)
        res+= adc_table[x_code[j]]
    end
    return res
end


function adc_dist_shared( x_code,  adc_table::Matrix)
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(x_code)
        res+= adc_table[x_code[j], j]
    end
    return res
end

query = X_tr_vecs[:,1]
adc_table_shared = compute_ADC_shared(query, P_shared, euclidean);

#query_code = encode_fast_shared(sqeuclidean, query, P_shared)
distance_adc = adc_dist(view(PQcodes,:,j) ,  adc_table_shared)  
distance_adc_shared = adc_dist_shared(view(PQcodes,:,j) ,  adc_table_shared)  

@assert distance_adc == distance_adc_shared

Now we need to verify that scanning over the database we get the same results with `linear_scann_shared` and with `linear_scann`

In [33]:

function linear_scann_shared(query, PQcodes, adc_table_shared, P_shared)
    
    
    query_code = encode_fast_shared(sqeuclidean, query, P_shared)
    n_features, n_examples = size(PQcodes)
    distances = Array{eltype(query)}(undef, n_examples)
    
    @inbounds @fastmath for j in 1:n_examples
        distances[j] = adc_dist_shared( view(PQcodes,:,j) ,  adc_table_shared)    
    end
    return distances
end

if profile_flag
    @benchmark distances = linear_scann($query, $PQcodes, $adc_table_shared, $P_shared)
end

distances = linear_scann(query, PQcodes, adc_table, P_shared_copied);
distances_shared = linear_scann_shared(query, PQcodes, adc_table_shared, P_shared);

@assert isapprox(distances, distances_shared)

In [34]:
j=1
@show adc_dist(  view(PQcodes,:,j) ,  adc_table_shared)
@show adc_dist_shared( view(PQcodes,:,j) ,  adc_table_shared);

adc_dist(view(PQcodes, :, j), adc_table_shared) = 6531.5146f0
adc_dist_shared(view(PQcodes, :, j), adc_table_shared) = 6531.5146f0


It is important to recall

- `PQcodes` will not be the same as `PQcodes_shared`:

    - `PQcodes` is constructed form a `n_features` codebooks.
    - `PQcodes_shared` is constructed form a single codebook.
    

In [35]:
pq_distances = linear_scann(query, PQcodes_shared, adc_table, P);
#pq_distances = linear_scann_shared(query, PQcodes_shared, adc_table_shared, P_shared);

x_closest = vec(get_top_k_from_distances(X_tr_vecs, pq_distances, 1));

df_x = DataFrame(Dict(:x => x, 
                      :x_pqcode => vec(x_pqcode),
                      :x_closest => vec(x_closest)
                        
                      ))

first(df_x, 5)

Unnamed: 0_level_0,x,x_closest,x_pqcode
Unnamed: 0_level_1,Float32,Float32,Int8
1,0.0,0.0,1
2,1.0,16.0,1
3,5.0,35.0,22
4,3.0,5.0,22
5,44.0,32.0,4


### Recall PQlinearscann Sharing a copies Quantizer

Here we just verify that if we copy a quantizer many times we still get similar quality

In [36]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

x_code = PQcodes_shared[:,query_id];
adc_table = compute_ADC(query, P_shared_copied, euclidean);
pq_distances = linear_scann(query, PQcodes_shared, adc_table, P_shared_copied);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.67


In [67]:

function compute_mean_recall(X, PQcodes_shared, true_neighbors,
                             P_shared,P_shared_copied, top_k::Int, dist::Function)

    n_features, n_examples = size(X)

    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        adc_table_shared = compute_ADC(query, P_shared_copied, dist);
        pq_distances = linear_scann(query, PQcodes_shared, adc_table_shared, P_shared_copied);
        top_k_pq = sortperm(pq_distances)[1:top_k];
        r = recall(top_k_pq, true_neighbors[:,query_id], 100)
        average += r;
    end
    return average / n_examples

end

avg_recall = compute_mean_recall(X_te_vecs[:, 1:10], PQcodes_shared, true_neighbors,
                                 P_shared, P_shared_copied, top_k, euclidean)

0.642

### Recall PQlinearscann Sharing a Quantizer

We don´t want to have copies of a quantizer, but have specific methods that use a single vector across features

In [38]:
function build_PQ_code_shared_copied(X_tr_vecs, P_shared_copied)
    n_features, n_examples = size(X_tr_vecs)

    PQcodes_shared = Array{Int32}(undef, n_features, n_examples);

    for j in 1:n_examples
        PQcodes_shared[:,j] = encode(euclidean, view(X_tr_vecs,:,j), P_shared_copied)  
    end

    @show maximum(PQcodes), maximum(PQcodes_shared)
    return PQcodes_shared
end

PQcodes_shared = build_PQ_code_shared_copied(X_tr_vecs, P_shared_copied);

(maximum(PQcodes), maximum(PQcodes_shared)) = (32, 32)


In [39]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

adc_table = compute_ADC(query, P_shared_copied, euclidean);
pq_distances = linear_scann_shared(query, PQcodes_shared, adc_table, P_shared);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.67


we can see that we can use `compute_ADC_shared` insead of `compute_ADC` and we get the same results

In [40]:
query_id = 3
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

adc_table_shared = compute_ADC_shared(query, P_shared, euclidean);
pq_distances = linear_scann_shared(query, PQcodes_shared, adc_table_shared, P_shared);
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, 100);

recall(top_k_pq, query_true_neighbors, 100) = 0.6


## Recall `linear_scann_shared`

In [41]:
function compute_mean_recall_shared(X, PQcodes, true_neighbors, P_shared, top_k::Int, dist::Function)
    n_features, n_examples = size(X)
    average = 0.
    for j in 1:n_examples
        query = X[:, j]
        query_true_neighbors = true_neighbors[:,j]
        table = compute_ADC_shared(query, P_shared, dist);
        pq_distances = linear_scann_shared(query, PQcodes, table, P_shared)
        top_k_pq = sortperm(pq_distances)[1:top_k];
        r = recall(top_k_pq, query_true_neighbors, 100);
        average +=r
    end
    return average / n_examples
end

avg_recall = compute_mean_recall_shared(X_te_vecs[:, 1:100], PQcodes_shared, true_neighbors,
                                        P_shared, top_k, euclidean)

0.629

In [42]:
@benchmark linear_scann_shared($query, $PQcodes_shared, $adc_table_shared, $P_shared)

BenchmarkTools.Trial: 116 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m42.215 ms[22m[39m … [35m 47.720 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m43.066 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m43.128 ms[22m[39m ± [32m553.047 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m [39m▁[39m▁[39m [39m▄[39m▃[39m▄[39m▃[39m [39m█[34m▃[39m[39m [32m [39m[39m▄[39m [39m [39m▁[39m [39m▁[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▄[39m▁[39m▁[39m▁[39m▁[

## Recall  `linear_scann_shared` with `adc_table_shared_vector`

In [89]:
function compute_mean_recall_shared(X, PQcodes, true_neighbors, P_shared, top_k::Int, dist::Function)
    n_features, n_examples = size(X)
    average = 0.
    for j in 1:n_examples
        query = X[:, j]
        query_true_neighbors = true_neighbors[:,j]
        table = compute_ADC_shared(query, P_shared, dist);
        pq_distances = linear_scann_shared(query, PQcodes, table, P_shared)
        top_k_pq = sortperm(pq_distances)[1:top_k];
        r = recall(top_k_pq, query_true_neighbors, 100);
        average +=r
    end
    return average / n_examples
end

avg_recall = compute_mean_recall_shared(X_te_vecs[:, 1:100], PQcodes_shared, true_neighbors,
                                        P_shared, top_k, euclidean)

0.629

In [113]:
function adc_dist_shared(x_code,  adc_table::Vector) 
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(x_code)
        res+= adc_table[x_code[j]]
    end
    return res
end


adc_dist_shared (generic function with 2 methods)

# Transposing  `linear_scann_shared` 

Note that the implementation of adc requires acesses to row and col changing values in `adc_table[x_code[j], j]`.

```julia
function adc_dist_shared( x_code,  adc_table::Matrix)
    res = zero(eltype(adc_table))
    @inbounds @simd for j in eachindex(x_code)
        res+= adc_table[x_code[j], j]              #### j is a different at every loop interation
    end
    return res
end
```

we could process data differently, dealing with `n_example` points for each of the `n_clusters` coordinates.
This means: perform a for loop over `n_cluster` coordinates and for each coordinate process `n_example` code values.

In [135]:
@benchmark linear_scann_shared($query, $PQcodes_shared, $adc_table_shared, $P_shared)

BenchmarkTools.Trial: 119 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m41.648 ms[22m[39m … [35m53.329 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m42.008 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m42.339 ms[22m[39m ± [32m 1.519 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.11% ± 0.39%

  [39m█[39m [39m▂[34m [39m[39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m▇[39m█[34m▇[39m[39m▄[32m▅

In [354]:

function linear_scann_shared_transposed(query, PQcodes_trans, adc_table_shared, P_shared)
    
    n_examples, n_features = size(PQcodes_trans)
    distances = zeros(eltype(query), n_examples)
    @inbounds @fastmath for j in 1:n_features
        adc_table_j = adc_table_shared[:,j]
         for n in 1:n_examples
            distances[n] += adc_table_j[PQcodes_trans[n, j]]
        end
    end
    return distances
end

linear_scann_shared_transposed (generic function with 1 method)

In [356]:
PQcodes_trans = Matrix(PQcodes')

distances_linear_scann_trans = linear_scann_shared_transposed(query, PQcodes_trans, 
                                                              adc_table_shared, P_shared);

distances_linear_scann = linear_scann_shared(query, PQcodes, adc_table_shared, P_shared)
distances_linear_scann_fast = linear_scann_shared_fast(query, PQcodes, adc_table_shared, P_shared);
distances_linear_scann_trans = linear_scann_shared_transposed(query, PQcodes_trans, 
                                                              adc_table_shared, P_shared);

@assert isapprox(distances_linear_scann ,distances_linear_scann_fast)
@assert isapprox(distances_linear_scann ,distances_linear_scann_trans)

Turns out this version is slower ?

In [359]:
@benchmark linear_scann_shared_transposed(query, PQcodes_trans, adc_table_shared, P_shared)

BenchmarkTools.Trial: 78 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m63.867 ms[22m[39m … [35m 67.648 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m64.495 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m64.658 ms[22m[39m ± [32m579.565 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.12% ± 0.42%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m▄[39m▁[39m [39m█[39m▁[34m▅[39m[39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▃[39m▁[39m▅[39m▃[39m▃[3

using StaticArrays

In [382]:

using StaticArrays

function linear_scann_shared_transposed(query, PQcodes_trans, adc_table_shared, P_shared)
    
    n_examples, n_features = size(PQcodes_trans)
    distances = zeros(eltype(query), n_examples)
    @inbounds for j in 1:n_features
        #adc_table_j = SVector{32,Float32}([adc_table_shared[:,1]...])
        @fastmath for n in 1:n_examples
            #@show j,n, PQcodes_trans[n, j]
            distances[n] += adc_table_j[PQcodes_trans[n,j]]
            
        end
    end
    return distances
end

linear_scann_shared_transposed (generic function with 1 method)

In [383]:
adc_table_j = SVector{32,Float32}([adc_table_shared[:,1]...]);

In [384]:
@benchmark linear_scann_shared_transposed($query, $PQcodes_trans, $adc_table_shared, $P_shared)

BenchmarkTools.Trial: 69 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m70.776 ms[22m[39m … [35m81.577 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m72.139 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m73.343 ms[22m[39m ± [32m 2.552 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.15% ± 0.59%

  [39m [39m [39m [39m▃[39m [39m▃[39m▂[34m█[39m[39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▅[39m▅[39m█[39m█[39m▇[39m█[39m█

In [387]:

adc_table_j

32-element SVector{32, Float32} with indices SOneTo(32):
   0.49666122
  80.90828
  32.94114
  43.903812
 119.045685
  21.973886
   8.950528
 145.65755
  37.948563
 134.67995
  63.98707
  74.986145
  14.96987
   ⋮
 112.17338
   3.9273367
  11.48651
  87.93994
  24.492191
  26.49079
 105.47092
  66.97982
  55.959335
   6.483061
  99.9554
  13.0