# Replacing ADC table by L1 distance between PQ codes

In [1]:
using HDF5
using BenchmarkTools
using Distances
using LoopVectorization
using SIMD
using Clustering
using ProgressMeter
using StaticArrays
using DataFrames
using Plots
using NPZ

In [2]:
profile_flag = false
Sys.cpu_info()[1].model

"Intel(R) Core(TM) i7-10750H CPU @ 2.60GHz"

## SIFT1M dataset

In [3]:
path = joinpath(homedir(), "TFM", "ann-benchmarks",  "sift-128-euclidean.hdf5")
#path = joinpath(homedir(), "Datasets", "SIFT1M", "sift-128-euclidean.hdf5")

"/home/adegaray/TFM/ann-benchmarks/sift-128-euclidean.hdf5"

In [4]:
f = h5open(path, "r")

X_tr_vecs = read(f["train"])
X_te_vecs = read(f["test"]);
true_neighbors = read(f["neighbors"])
true_distances = read(f["distances"])

true_neighbors .= true_neighbors .+ 1;

@show size(X_tr_vecs)
@show size(X_te_vecs)
@show size(true_neighbors)
@show size(true_distances)

n_features, n_examples = size(X_tr_vecs)

size(X_tr_vecs) = (128, 1000000)
size(X_te_vecs) = (128, 10000)
size(true_neighbors) = (100, 10000)
size(true_distances) = (100, 10000)


(128, 1000000)

### Compute Recall PQLinearScann 

In [3]:
function recall(predicted, relevant, eval_at)
    """
    fraction of the relevant documents that are successfully retrieved
    """
    if eval_at == 0
        return 0.0
    end
    
    predicted_at_k = predicted[1:eval_at]
    n_predicted_and_relevant =  length(intersect( Set(predicted_at_k), Set(relevant))) 
    return n_predicted_and_relevant/ length(relevant)
end

recall (generic function with 1 method)

###  PQLinearscann Sharing prototypes across features

https://groups.google.com/g/julia-users/c/xBcQRebyi_o



In [4]:

function encode_shared(dist, vector::Array{T}, shared_prototypes::Array{T}) where T
    n_clusters = length(shared_prototypes)
    n_features = size(vector)[1]
    closest_prototypes = Array{Int32}(undef, n_features, 1);
    
    @inbounds for (j,x) in enumerate(vector)
        best_coordinate = 1
        min_distance::T = typemax(T)
        for k in 1:n_clusters
           current_dist = dist(shared_prototypes[k], x)
           if current_dist < min_distance
               best_coordinate = k
               min_distance = current_dist
           end
           #println(k, ' ', j, ' ', best_coordinate, ' ',min_distance )
        end            
        closest_prototypes[j] = best_coordinate
    end
    return closest_prototypes
end

encode_shared (generic function with 1 method)

We load the K=32 centroids resulting of performing 1d-kmeans over the first feature of the train dataset. Notice that these centroids are sorted, resulting in a sorted codification. We will take advantage of shared quantization.

In [7]:
P_shared = vec(Float32.(npzread("1dkmeans_prototypes/1dkmeans_shared_prototypes_SIFT1M.npy")))

32-element Vector{Float32}:
   0.22927776
   2.4602568
   4.935903
   7.958296
  10.961814
  13.964964
  16.974878
  19.983488
  23.455843
  27.45342
   ⋮
  94.42536
 100.457985
 106.51546
 112.64925
 118.534
 124.41393
 130.70255
 138.0879
 148.98564

In [8]:
PQcodes_shared = Array{Int8}(undef, n_features, n_examples);

for j in 1:n_examples
    PQcodes_shared[:,j] = encode_shared(euclidean, X_tr_vecs[:,j], P_shared)  
end

### Inspect idea of computing distances without adc table: directly from pqcodes


In [95]:
function abs_dist(y::Array{T}, X::Array{T}, j) where T
    # Here I use a bigger Int type than 8 due to avoid
    # res beeing overflowed
    res = Int16(0)
    @inbounds @fastmath  for k in eachindex(y)
        res += abs(X[k, j] - y[k])
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end
    return distances
end

linear_scann_exact (generic function with 1 method)

In [10]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);

pq_distances = linear_scann_exact(abs_dist, query_code, PQcodes_int8)
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, top_k);

recall(top_k_pq, query_true_neighbors, top_k) = 0.58


In [11]:
@benchmark linear_scann_exact($abs_dist, $query_code, $PQcodes_int8)

BenchmarkTools.Trial: 368 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m11.142 ms[22m[39m … [35m65.473 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m12.716 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m13.581 ms[22m[39m ± [32m 3.922 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.13% ± 1.17%

  [39m▃[39m▅[39m▇[39m█[39m▆[34m▆[39m[39m▆[39m▃[32m▃[39m[39m▃[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m█[39m█[39m█[39m█[34m█[39m

### Top_k_expansion + finetunning

An interesting idea would be to compute a candidate space of nearest neighbours and "finetunit" on using exact distances.

Here he have an `extra_factor` that can be used to tune the top_k expansion (the higher the better quality at the expense of time).

In [62]:
function linear_scann_exact_pq(dist, query, X, top_k, extra_factor)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end

    top_result_pos = sortperm(distances)[1:top_k*extra_factor];
    
    return top_result_pos
end

linear_scann_exact_pq (generic function with 1 method)

In [64]:
function euclidean_mat2(y, X, j) where T
    # Here I use a bigger Int type than 8 due to avoid
    # res beeing overflowed
    res = zero(eltype(y))
    @inbounds @fastmath  for k in eachindex(y)
        partial = X[k, j] - y[k]
        res += partial * partial
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end
    return distances
end

linear_scann_exact (generic function with 1 method)

We could compute exact distances within a subset of examples

In [14]:

query_id = 5
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);
best_ids = linear_scann_exact_pq(abs_dist, query_code, PQcodes_int8, top_k, 10);


#linear_scann_exact(euclidean_mat, query, view(X_tr_vecs,:,best_ids))
distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k];
@show recall(best_ids[permutation_expanded], query_true_neighbors, top_k);

recall(best_ids[permutation_expanded], query_true_neighbors, top_k) = 0.99


In [15]:
function compute_mean_recall_extra(X, PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k::Int, dist::Function)

    n_features, n_examples = size(X)
    PQcodes_int8 = Int8.(PQcodes_shared);
    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        query_code = encode_shared(euclidean, query, P_shared)
        query_code = Int8.(vec(query_code))
        best_ids = linear_scann_exact_pq(dist, query_code, PQcodes_int8, top_k, extra_factor);
        distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
        permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k]
        r = recall(best_ids[permutation_expanded], true_neighbors[:,query_id], top_k);
        average += r;
        end
    return average / n_examples
    end

extra_factor = 10  
avg_recall = compute_mean_recall_extra(X_te_vecs[:, 1:10], PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k, abs_dist) 

0.982

Here the problem is that we assume X_tr_vecs is "on memory" and we want to avoid this as much as possible,
because it can be potentially quite big. We have to investigate how to store the 'exact values from X_tr_vecs' on disk, 
using a memmap array like storage. Also study the overhead of doing this.

In [16]:
@benchmark distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids])

BenchmarkTools.Trial: 10000 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m135.800 μs[22m[39m … [35m  8.055 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 93.42%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m157.500 μs               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m215.101 μs[22m[39m ± [32m206.442 μs[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m2.37% ±  2.98%

  [39m█[39m▇[34m▆[39m[39m▅[39m▄[39m▄[39m▃[39m▃[39m▂[32m▁[39m[39m▁[39m▄[39m▃[39m▃[39m▂[39m▂[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m [39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▂
  [39m█[39m█[34m█

Other test with hamming



In [17]:

@inline function hamming(y::Array{T}, X::Array{T}, j) where T
    # Here I use a bigger Int type than 8 due to avoid
    # res beeing overflowed
    res = UInt8(0)
    @inbounds @simd  for k in eachindex(y)
        res += X[k, j] != y[k]
    end
    return res
end

function linear_scann_exact_hamming( query, X)

    n_features, n_examples = size(X)
    distances = Array{UInt8}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = hamming(query, X, j)    
    end
    return distances
end

linear_scann_exact_hamming (generic function with 1 method)

In [18]:
query_id = 20
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = UInt8.(vec(query_code))

PQcodes_int8 = UInt8.(PQcodes_shared);

pq_distances = linear_scann_exact(hamming, query_code, PQcodes_int8)
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, top_k);

recall(top_k_pq, query_true_neighbors, top_k) = 0.01


In [19]:
@benchmark linear_scann_exact_hamming($query_code, $PQcodes_int8)

BenchmarkTools.Trial: 410 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m 9.740 ms[22m[39m … [35m40.730 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m10.965 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m12.177 ms[22m[39m ± [32m 3.851 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m▅[39m█[39m▇[39m█[34m▆[39m[39m▅[39m▃[39m▃[32m▃[39m[39m▂[39m▂[39m▃[39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m█[39m█[39m█[34m█[39m[39m█

### Benchmark times exact vs linearscann

#### Type UInt8

In [20]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = UInt8.(vec(query_code))

PQcodes = UInt8.(PQcodes_shared);

In [21]:
@inline function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, view(X,:,j))    
    end
    return distances
end

@benchmark linear_scann_exact($Euclidean0, $query, $X_tr_vecs)


BenchmarkTools.Trial: 120 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m37.413 ms[22m[39m … [35m55.200 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m41.318 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m41.876 ms[22m[39m ± [32m 3.198 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m [39m▂[39m [39m [39m▂[39m [39m▅[39m [39m [39m▅[39m [39m [39m [39m [39m█[34m▅[39m[39m [32m [39m[39m [39m [39m [39m [39m▂[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▇[39m█[39m█[39m▄[39m█[39m█[39m

In [22]:
function abs_dist(y::Array{T}, X::Array{T}, j) where T
    # Here I use a bigger Int type than 8 due to avoid
    # res beeing overflowed
    res = Int16(0)
    @inbounds @fastmath  for k in eachindex(y)
        res += abs(X[k, j] - y[k])
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end
    return distances
end

@benchmark linear_scann_exact($abs_dist, $query_code, $PQcodes_int8)

BenchmarkTools.Trial: 341 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m10.975 ms[22m[39m … [35m67.363 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m13.037 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m14.643 ms[22m[39m ± [32m 5.554 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.07% ± 0.72%

  [39m [39m▂[39m▃[39m█[34m▅[39m[39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▅[39m█[39m█[39m█[34m█[39m[39m█

## GIST dataset

In [23]:
path = joinpath(homedir(), "TFM", "ann-benchmarks", "gist-960-euclidean.hdf5")
#path = joinpath(homedir(), "Datasets", "GIST", "gist-960-euclidean.hdf5")

"/home/adegaray/TFM/ann-benchmarks/gist-960-euclidean.hdf5"

In [24]:
f = h5open(path, "r")

X_tr_vecs = read(f["train"])
X_te_vecs = read(f["test"]);
true_neighbors = read(f["neighbors"])
true_distances = read(f["distances"])

true_neighbors .= true_neighbors .+ 1;

@show size(X_tr_vecs)
@show size(X_te_vecs)
@show size(true_neighbors)
@show size(true_distances)

n_features, n_examples = size(X_tr_vecs)

size(X_tr_vecs) = (960, 1000000)
size(X_te_vecs) = (960, 1000)
size(true_neighbors) = (100, 1000)
size(true_distances) = (100, 1000)


(960, 1000000)

In [25]:
P_shared = vec(Float32.(npzread("1dkmeans_prototypes/1dkmeans_shared_prototypes_GIST.npy")))

32-element Vector{Float32}:
 0.008980796
 0.01590017
 0.022200761
 0.028304834
 0.03437995
 0.040527266
 0.046768155
 0.053139895
 0.05975771
 0.06654963
 ⋮
 0.21748106
 0.23899144
 0.2642882
 0.2944512
 0.33110675
 0.38038924
 0.4493068
 0.5439606
 0.7076147

In [26]:
PQcodes_shared = Array{Int8}(undef, n_features, n_examples);

for j in 1:n_examples
    PQcodes_shared[:,j] = encode_shared(euclidean, X_tr_vecs[:,j], P_shared)  
end

In [27]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);

pq_distances = linear_scann_exact(abs_dist, query_code, PQcodes_int8)
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, top_k);

recall(top_k_pq, query_true_neighbors, top_k) = 0.64


### Top_k_expansion + finetunning

In [28]:
query_id = 5
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);
best_ids = linear_scann_exact_pq(abs_dist, query_code, PQcodes_int8, top_k, 10);

#linear_scann_exact(euclidean_mat, query, view(X_tr_vecs,:,best_ids))
distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k];
@show recall(best_ids[permutation_expanded], query_true_neighbors, top_k);

recall(best_ids[permutation_expanded], query_true_neighbors, top_k) = 0.87


In [29]:
function compute_mean_recall_extra(X, PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k::Int, dist::Function)

    n_features, n_examples = size(X)
    PQcodes_int8 = Int8.(PQcodes_shared);
    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        query_code = encode_shared(euclidean, query, P_shared)
        query_code = Int8.(vec(query_code))
        best_ids = linear_scann_exact_pq(dist, query_code, PQcodes_int8, top_k, extra_factor);
        distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
        permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k]
        r = recall(best_ids[permutation_expanded], true_neighbors[:,query_id], top_k);
        average += r;
        end
    return average / n_examples
    end

extra_factor = 10  
avg_recall = compute_mean_recall_extra(X_te_vecs[:, 1:10], PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k, abs_dist) 

0.959

### Benchmark times exact vs linearscann

In [30]:
@inline function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, view(X,:,j))    
    end
    return distances
end

@benchmark linear_scann_exact($Euclidean0, $query, $X_tr_vecs)

BenchmarkTools.Trial: 15 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m297.356 ms[22m[39m … [35m581.097 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m311.755 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m345.507 ms[22m[39m ± [32m 78.207 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m▄[39m [39m█[34m▁[39m[39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m▁[39m█[34m█

In [33]:
function abs_dist(y::Array{T}, X::Array{T}, j) where T
    # Here I use a bigger Int type than 8 due to avoid
    # res beeing overflowed
    res = Int16(0)
    @inbounds @fastmath  for k in eachindex(y)
        res += abs(X[k, j] - y[k])
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end
    return distances
end

@benchmark linear_scann_exact($abs_dist, $query_code, $PQcodes_int8)

linear_scann_exact (generic function with 1 method)

## DEEP1B

In [7]:
path = joinpath(homedir(), "TFM", "ann-benchmarks", "deep-image-96-angular.hdf5")
#path = joinpath(homedir(), "Datasets", "DEEP1B", "deep-image-96-angular.hdf5")

"/home/adegaray/TFM/ann-benchmarks/deep-image-96-angular.hdf5"

In [8]:
f = h5open(path, "r")

X_tr_vecs = read(f["train"])
X_te_vecs = read(f["test"]);
true_neighbors = read(f["neighbors"])
true_distances = read(f["distances"])

true_neighbors .= true_neighbors .+ 1;

@show size(X_tr_vecs)
@show size(X_te_vecs)
@show size(true_neighbors)
@show size(true_distances)

size(X_tr_vecs) = (96, 9990000)
size(X_te_vecs) = (96, 10000)
size(true_neighbors) = (100, 10000)
size(true_distances) = (100, 10000)


(100, 10000)

In [9]:
n_features, n_examples = size(X_tr_vecs)

(96, 9990000)

In [10]:
P_shared = vec(Float32.(npzread("1dkmeans_prototypes/1dkmeans_shared_prototypes_DEEP1B.npy")))

32-element Vector{Float32}:
 -0.27250472
 -0.2139312
 -0.17616773
 -0.14658304
 -0.12195256
 -0.100370415
 -0.08112478
 -0.0633399
 -0.04658539
 -0.030714953
  ⋮
  0.16274813
  0.17819086
  0.19457616
  0.21223503
  0.23164739
  0.25349933
  0.2793261
  0.31243566
  0.36135882

In [11]:
PQcodes_shared = Array{Int8}(undef, n_features, n_examples);

for j in 1:n_examples
    PQcodes_shared[:,j] = encode_shared(euclidean, X_tr_vecs[:,j], P_shared)  
end

In [12]:
size(PQcodes_shared)

(96, 9990000)

In [13]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);

pq_distances = linear_scann_exact(abs_dist, query_code, PQcodes_int8)
top_k_pq = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq, query_true_neighbors, top_k);

recall(top_k_pq, query_true_neighbors, top_k) = 0.64


### Top_k_expansion + finetunning

In [14]:
query_id = 5
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);
best_ids = linear_scann_exact_pq(abs_dist, query_code, PQcodes_int8, top_k, 10);

#linear_scann_exact(euclidean_mat, query, view(X_tr_vecs,:,best_ids))
distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k];
@show recall(best_ids[permutation_expanded], query_true_neighbors, top_k);

recall(best_ids[permutation_expanded], query_true_neighbors, top_k) = 1.0


In [13]:
function compute_mean_recall_extra(X, PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k::Int, dist::Function)

    n_features, n_examples = size(X)
    PQcodes_int8 = Int8.(PQcodes_shared);
    average = 0.
    println("Computing mean recall")
    for query_id in 1:n_examples
        println("Query id:",query_id)
        query = X[:, query_id]
        query_code = encode_shared(euclidean, query, P_shared)
        query_code = Int8.(vec(query_code))
        best_ids = linear_scann_exact_pq(dist, query_code, PQcodes_int8, top_k, extra_factor);
        distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
        permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k]
        r = recall(best_ids[permutation_expanded], true_neighbors[:,query_id], top_k);
        println("Recall:",r)
        average += r;
        end
    return average / n_examples
    end

top_k = 100
extra_factor = 10
avg_recall = compute_mean_recall_extra(X_te_vecs[:, 1:10], PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k, abs_dist) 

Computing mean recall
Query id:1
Recall:

1.0
Query id:2
Recall:

1.0
Query id:3
Recall:

1.0
Query id:4
Recall:

1.0
Query id:5
Recall:

1.0
Query id:6
Recall:

1.0
Query id:7
Recall:

1.0
Query id:8
Recall:

1.0
Query id:9
Recall:

1.0
Query id:10
Recall:

1.0


1.0

We can reduce the extra_factor to 5 and still get perfect quality:

In [15]:
function compute_mean_recall_extra(X, PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k::Int, dist::Function)

    n_features, n_examples = size(X)
    PQcodes_int8 = Int8.(PQcodes_shared);
    average = 0.
    println("Computing mean recall")
    for query_id in 1:n_examples
        println("Query id:",query_id)
        query = X[:, query_id]
        query_code = encode_shared(euclidean, query, P_shared)
        query_code = Int8.(vec(query_code))
        best_ids = linear_scann_exact_pq(dist, query_code, PQcodes_int8, top_k, extra_factor);
        distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
        permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k]
        r = recall(best_ids[permutation_expanded], true_neighbors[:,query_id], top_k);
        println("Recall:",r)
        average += r;
        end
    return average / n_examples
    end

top_k = 100
extra_factor = 5
avg_recall = compute_mean_recall_extra(X_te_vecs[:, 1:10], PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k, abs_dist) 

Computing mean recall
Query id:1
Recall:

0.99
Query id:2
Recall:

1.0
Query id:3
Recall:

1.0
Query id:4
Recall:

1.0
Query id:5
Recall:

0.99
Query id:6
Recall:

1.0
Query id:7
Recall:

1.0
Query id:8
Recall:

0.98
Query id:9
Recall:

0.99
Query id:10
Recall:

1.0


0.9950000000000001

### Benchmark times exact vs linearscann

In [15]:
@inline function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, view(X,:,j))    
    end
    return distances
end

query_id = 5
query = X_te_vecs[:,query_id];

@benchmark linear_scann_exact($Euclidean0, $query, $X_tr_vecs)

BenchmarkTools.Trial: 12 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m348.424 ms[22m[39m … [35m680.322 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m436.118 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m456.203 ms[22m[39m ± [32m109.172 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m▁[39m▁[39m [39m [39m▁[39m [39m█[39m [39m [39m [39m [39m [39m [39m [34m▁[39m[39m [39m [39m [39m▁[39m [32m [39m[39m▁[39m▁[39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m [39m [39m▁[39m [39m 
  [39m█[39m█[39m▁[39m▁

In [17]:
function abs_dist(y::Array{T}, X::Array{T}, j) where T
    # Here I use a bigger Int type than 8 due to avoid
    # res beeing overflowed
    res = Int16(0)
    @inbounds @fastmath  for k in eachindex(y)
        res += abs(X[k, j] - y[k])
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end
    return distances
end

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))
PQcodes_int8 = Int8.(PQcodes_shared);

@benchmark linear_scann_exact($abs_dist, $query_code, $PQcodes_int8)

BenchmarkTools.Trial: 15 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m339.066 ms[22m[39m … [35m364.929 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m350.704 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m350.688 ms[22m[39m ± [32m  7.357 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.00% ± 0.00%

  [39m█[39m [39m [39m [39m [39m█[39m [39m [39m [39m [39m [39m [39m [39m█[39m█[39m█[39m [39m█[39m [39m [39m [39m [39m [34m█[39m[39m [39m [39m [32m█[39m[39m [39m [39m [39m [39m█[39m█[39m [39m█[39m [39m█[39m█[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m [39m█[39m [39m 
  [39m█[39m▁[39m▁[39m▁

## GloVe

In [5]:
path = joinpath(homedir(), "TFM", "ann-benchmarks", "glove-100-angular.hdf5")

"/home/adegaray/TFM/ann-benchmarks/glove-100-angular.hdf5"

In [6]:
f = h5open(path, "r")

X_tr_vecs = read(f["train"])
X_te_vecs = read(f["test"]);
true_neighbors = read(f["neighbors"])
true_distances = read(f["distances"])

true_neighbors .= true_neighbors .+ 1;

@show size(X_tr_vecs)
@show size(X_te_vecs)
@show size(true_neighbors)
@show size(true_distances)

size(X_tr_vecs) = (100, 1183514)
size(X_te_vecs) = (100, 10000)
size(true_neighbors) = (100, 10000)
size(true_distances) = (100, 10000)


(100, 10000)

In [7]:
n_features, n_examples = size(X_tr_vecs)

(100, 1183514)

We use a shared quantizer with one prototypes 32-vector resulting of concatenating 5 random features and applying 1dkemans to the concatenated array.

In [48]:
P_shared = vec(Float32.(npzread("1dkmeans_prototypes/1dkmeans_shared_prototypes_GloVe_5feat.npy")))

32-element Vector{Float32}:
 -2.485003
 -1.9322503
 -1.6038234
 -1.3700459
 -1.1872454
 -1.0356696
 -0.9052867
 -0.78944314
 -0.68417054
 -0.5869177
  ⋮
  0.5732735
  0.6769144
  0.7904752
  0.9176359
  1.0637804
  1.2394978
  1.4628069
  1.7744759
  2.3054712

In [50]:
PQcodes_shared = Array{Int8}(undef, n_features, n_examples);

for j in 1:n_examples
    PQcodes_shared[:,j] = encode_shared(euclidean, X_tr_vecs[:,j], P_shared) 
end

### Cosine distance

In [51]:
using LSHFunctions, LinearAlgebra

In [52]:
function cos_dist(y::Array{T}, X::Array{T}, j) where T
    dist = cossim(y,X[:,j])
    return dist
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)
        #println(distances[j])   
    end
    return distances
end

linear_scann_exact (generic function with 1 method)

In [None]:
#ALGO ASÍ PARA DETERMINAR EL TYPE DEL OUTPUT EN VEZ DE MODIFICAR LOS TYPES DE LOS INPUTS?
#function cos_dist(y, X, j)::Int32
#    dist = cossim(y,X[:,j])
#    return dist
#end

Again, we encounter problems for Int8 and Int16 query types

In [53]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int32.(vec(query_code))

PQcodes_int8 = Int32.(PQcodes_shared);

pq_distances = linear_scann_exact(cos_dist, query_code, PQcodes_int8)
top_k_pq_cos = sortperm(pq_distances)[length(pq_distances)-top_k+1:length(pq_distances)];

@show recall(top_k_pq_cos, query_true_neighbors, top_k);

recall(top_k_pq_cos, query_true_neighbors, top_k) = 0.32


#### Top_k_expansion + finetunning

In [54]:
function linear_scann_exact_pq_cos(dist, query, X, top_k, extra_factor)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end

    top_result_pos = sortperm(distances)[length(distances)-top_k+1:length(distances)];
    
    return top_result_pos
end

linear_scann_exact_pq_cos (generic function with 1 method)

In [55]:
query_id = 5
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int32.(vec(query_code))

PQcodes_int8 = Int32.(PQcodes_shared);
best_ids = linear_scann_exact_pq_cos(cos_dist, query_code, PQcodes_int8, top_k, 10);

#linear_scann_exact(euclidean_mat, query, view(X_tr_vecs,:,best_ids))
distances_candidates_expanded = linear_scann_exact(cos_dist, query, X_tr_vecs[:,best_ids]);
permutation_expanded = sortperm(distances_candidates_expanded)[length(distances_candidates_expanded)-top_k+1:length(distances_candidates_expanded)];
@show recall(best_ids[permutation_expanded], query_true_neighbors, top_k);

recall(best_ids[permutation_expanded], query_true_neighbors, top_k) = 0.37


(Augments from 0.33 to 0.37 when building the shared prototypes out of 5 feats.)

In [56]:
function compute_mean_recall_extra(X, PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k::Int, dist::Function)

    n_features, n_examples = size(X)
    PQcodes_int8 = Int32.(PQcodes_shared);
    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        query_code = encode_shared(euclidean, query, P_shared)
        query_code = Int32.(vec(query_code))
        best_ids = linear_scann_exact_pq_cos(dist, query_code, PQcodes_int8, top_k, extra_factor);
        distances_candidates_expanded = linear_scann_exact(cos_dist, query, X_tr_vecs[:,best_ids]);
        permutation_expanded = sortperm(distances_candidates_expanded)[length(distances_candidates_expanded)-top_k+1:length(distances_candidates_expanded)];
        r = recall(best_ids[permutation_expanded], true_neighbors[:,query_id], top_k);
        average += r;
        end
    return average / n_examples
    end

extra_factor = 10
top_k = 100
avg_recall = compute_mean_recall_extra(X_te_vecs[:, 1:100], PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k, cos_dist) 

0.4882999999999999

(Augments from 0.466 to 0.488 when building the shared prototypes out of 5 feats.)

### L1 distance

In [58]:
query_id = 1
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);

pq_distances = linear_scann_exact(abs_dist, query_code, PQcodes_int8)
top_k_pq_l1 = sortperm(pq_distances)[1:top_k];

@show recall(top_k_pq_l1, query_true_neighbors, top_k);

recall(top_k_pq_l1, query_true_neighbors, top_k) = 0.35


(Augments from 0.32 to 0.35 when building the shared prototypes out of 5 feats.)

#### Top_k_pq_cos vs. top_k_pq_l1

In [59]:
inter_top = intersect(top_k_pq_cos, top_k_pq_l1)

55-element Vector{Int64}:
  834700
  660542
  809930
  340498
  455464
  493890
  547335
 1051873
  870694
  392022
       ⋮
  908320
  229952
  995276
  402217
 1133490
  989331
   97479
   93439
  660282

55 common indices in top100

In [60]:
intersect(inter_top, query_true_neighbors)

28-element Vector{Int64}:
  834700
  340498
  547335
  817758
   51822
  265227
  647844
  186282
  859960
  368656
       ⋮
  242798
  908320
  995276
  402217
 1133490
  989331
   97479
   93439
  660282

28 elements out of the total 35/37 that l1/cos dists get right are common.

#### Top_k_expansion + finetunning

In [65]:
query_id = 5
query = X_te_vecs[:,query_id];
query_true_neighbors = true_neighbors[:,query_id]
top_k = 100

query_code = encode_shared(euclidean, query, P_shared)
query_code = Int8.(vec(query_code))

PQcodes_int8 = Int8.(PQcodes_shared);
best_ids = linear_scann_exact_pq(abs_dist, query_code, PQcodes_int8, top_k, 10);

#linear_scann_exact(euclidean_mat, query, view(X_tr_vecs,:,best_ids))
distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k];
@show recall(best_ids[permutation_expanded], query_true_neighbors, top_k);

recall(best_ids[permutation_expanded], query_true_neighbors, top_k) = 0.37


In [66]:
function compute_mean_recall_extra(X, PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k::Int, dist::Function)

    n_features, n_examples = size(X)
    PQcodes_int8 = Int8.(PQcodes_shared);
    average = 0.
    for query_id in 1:n_examples
        query = X[:, query_id]
        query_code = encode_shared(euclidean, query, P_shared)
        query_code = Int8.(vec(query_code))
        best_ids = linear_scann_exact_pq(dist, query_code, PQcodes_int8, top_k, extra_factor);
        distances_candidates_expanded = linear_scann_exact(euclidean_mat2, query, X_tr_vecs[:,best_ids]);
        permutation_expanded = sortperm(distances_candidates_expanded)[1:top_k]
        r = recall(best_ids[permutation_expanded], true_neighbors[:,query_id], top_k);
        average += r;
        end
    return average / n_examples
    end

extra_factor = 10
top_k = 100
avg_recall = compute_mean_recall_extra(X_te_vecs[:, 1:100], PQcodes_shared, true_neighbors, P_shared, extra_factor, top_k, abs_dist) 

0.49119999999999986

The final recall is similar including the finetunning.

### Benchmark times exact vs linearscann

In [26]:
@inline function Euclidean0(x, query)
    res = zero(eltype(x))
    @inbounds @fastmath  for j in eachindex(x)
        aux = (query[j] - x[j])
        res += aux * aux
    end
    return sqrt(res)
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, view(X,:,j))    
    end
    return distances
end

@benchmark linear_scann_exact($Euclidean0, $query, $X_tr_vecs)

BenchmarkTools.Trial: 121 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m36.832 ms[22m[39m … [35m75.165 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m40.168 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m41.402 ms[22m[39m ± [32m 5.122 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.03% ± 0.30%

  [39m [39m [39m▁[39m [39m [39m▃[39m▁[39m█[39m [34m [39m[39m▃[39m▄[32m▃[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m▄[39m█[39m▅[39m▆[39m█[39m

In [27]:
function abs_dist(y::Array{T}, X::Array{T}, j) where T
    # Here I use a bigger Int type than 8 due to avoid
    # res beeing overflowed
    res = Int16(0)
    @inbounds @fastmath  for k in eachindex(y)
        res += abs(X[k, j] - y[k])
    end
    return res
end

function linear_scann_exact(dist, query, X)

    n_features, n_examples = size(X)
    distances = Array{Float32}(undef, n_examples)
    
    @inbounds for j in 1:n_examples
        distances[j] = dist(query, X, j)    
    end
    return distances
end

@benchmark linear_scann_exact($abs_dist, $query_code, $PQcodes_int8)

BenchmarkTools.Trial: 112 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m38.634 ms[22m[39m … [35m54.199 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m44.842 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m44.967 ms[22m[39m ± [32m 2.898 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.03% ± 0.29%

  [39m [39m [39m [39m [39m [39m [39m [39m [39m▂[39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m▂[39m▂[39m [39m▂[39m [39m▂[39m [39m▂[34m [39m[32m [39m[39m▂[39m▄[39m [39m▂[39m [39m▄[39m▂[39m [39m▄[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▄[39m▁[39m▁[39m▁[39m▄[39m▄[39m