Modified version of https://github.com/genkuroki/public/blob/main/0016/apricot/julia_translation_of_python_reimpl.ipynb

In [1]:
#using Seaborn
using ScikitLearn: @sk_import
@sk_import datasets: fetch_covtype
#using Random
#using StatsBase: sample

PyObject <function fetch_covtype at 0x000000006A6EAC10>

In [2]:
digits_data = fetch_covtype()

Dict{Any, Any} with 6 entries:
  "feature_names" => ["Elevation", "Aspect", "Slope", "Horizontal_Distance_To_H…
  "frame"         => nothing
  "target_names"  => ["Cover_Type"]
  "data"          => [2596.0 51.0 … 0.0 0.0; 2590.0 56.0 … 0.0 0.0; … ; 2384.0 …
  "target"        => Int32[5, 5, 2, 2, 5, 2, 5, 5, 5, 5  …  3, 3, 3, 3, 3, 3, 3…
  "DESCR"         => ".. _covtype_dataset:\n\nForest covertypes\n--------------…

In [3]:
X_digits = permutedims(abs.(digits_data["data"]))
summary(X_digits)

"54×581012 Matrix{Float64}"

In [4]:
"""`calculate_gains!(X, gains, current_values, idxs, current_concave_values_sum)` mutates `gains` only"""
function calculate_gains!(X, gains, current_values, idxs, current_concave_values_sum)
    Threads.@threads for i in eachindex(idxs)
        @inbounds idx = idxs[i]
        @inbounds gains[i] = sum(j -> sqrt(current_values[j] + X[j, idx]), axes(X, 1))
    end
    gains .-= current_concave_values_sum
end

@doc calculate_gains!

`calculate_gains!(X, gains, current_values, idxs, current_concave_values_sum)` mutates `gains` only


In [5]:
"""
The revised version of using popat!(idxs, idx) without mask
"""
function fit_popat(X, k; calculate_gains! = calculate_gains!)
    d, n = size(X)

    cost = 0.0

    ranking = Int[]
    total_gains = Float64[]

    current_values = zeros(d)
    current_concave_values_sum = sum(sqrt, current_values)

    idxs = collect(1:n)

    gains = zeros(n)
    while cost < k
        calculate_gains!(X, gains, current_values, idxs, current_concave_values_sum)

        idx = argmax(gains)
        best_idx = idxs[idx]
        curr_cost = 1.0
        
        cost + curr_cost > k && break

        cost += curr_cost
        # Calculate gains
        gain = gains[idx] * curr_cost

        # Select next
        current_values .+= @view X[:, best_idx]
        current_concave_values_sum = sum(sqrt, current_values)

        push!(ranking, best_idx)
        push!(total_gains, gain)

        popat!(idxs, idx)
    end
    return ranking, total_gains
end

fit_popat

In [6]:
"""
The revised version of using mask::BitVector with findall(mask)
"""
function fit_bitvector(X, k; calculate_gains! = calculate_gains!)
    d, n = size(X)

    cost = 0.0

    ranking = Int[]
    total_gains = Float64[]

    mask = trues(n) # `false` stands for "masked".
    current_values = zeros(d)
    current_concave_values_sum = sum(sqrt, current_values)

    idxs = collect(1:n)

    gains = zeros(n)
    while cost < k
        calculate_gains!(X, gains, current_values, idxs, current_concave_values_sum)

        idx = argmax(gains)
        best_idx = idxs[idx]
        curr_cost = 1.0
        
        cost + curr_cost > k && break

        cost += curr_cost
        # Calculate gains
        gain = gains[idx] * curr_cost

        # Select next
        current_values .+= @view X[:, best_idx]
        current_concave_values_sum = sum(sqrt, current_values)

        push!(ranking, best_idx)
        push!(total_gains, gain)

        mask[best_idx] = 0
        idxs = findall(mask)
    end
    return ranking, total_gains
end

fit_bitvector

In [7]:
"""
The revised version of using mask::Vector{Float64} with findall(mask .== 0)
"""
function fit_f64vector(X, k; calculate_gains! = calculate_gains!)
    d, n = size(X)

    cost = 0.0

    ranking = Int[]
    total_gains = Float64[]

    mask = zeros(n)
    current_values = zeros(d)
    current_concave_values_sum = sum(sqrt, current_values)

    idxs = collect(1:n)

    gains = zeros(n)
    while cost < k
        calculate_gains!(X, gains, current_values, idxs, current_concave_values_sum)

        idx = argmax(gains)
        best_idx = idxs[idx]
        curr_cost = 1.0
        
        cost + curr_cost > k && break

        cost += curr_cost
        # Calculate gains
        gain = gains[idx] * curr_cost

        # Select next
        current_values .+= @view X[:, best_idx]
        current_concave_values_sum = sum(sqrt, current_values)

        push!(ranking, best_idx)
        push!(total_gains, gain)

        mask[best_idx] = 1
        idxs = findall(mask .== 0)
    end
    return ranking, total_gains
end

fit_f64vector

In [8]:
"""
The original version of using mask::Vector{Float64} with findall(==(0), mask)
"""
function fit_f64vector_org(X, k; calculate_gains! = calculate_gains!)
    d, n = size(X)

    cost = 0.0

    ranking = Int[]
    total_gains = Float64[]

    mask = zeros(n)
    current_values = zeros(d)
    current_concave_values_sum = sum(sqrt, current_values)

    idxs = collect(1:n)

    gains = zeros(n)
    while cost < k
        calculate_gains!(X, gains, current_values, idxs, current_concave_values_sum)

        idx = argmax(gains)
        best_idx = idxs[idx]
        curr_cost = 1.0
        
        cost + curr_cost > k && break

        cost += curr_cost
        # Calculate gains
        gain = gains[idx] * curr_cost

        # Select next
        current_values .+= @view X[:, best_idx]
        current_concave_values_sum = sum(sqrt, current_values)

        push!(ranking, best_idx)
        push!(total_gains, gain)

        mask[best_idx] = 1
        idxs = findall(==(0), mask)
    end
    return ranking, total_gains
end

fit_f64vector_org

In [9]:
k = 1000

1000

In [10]:
@time ranking0_pa, gains0_pa = fit_popat(X_digits, k; calculate_gains! = calculate_gains!);

 12.730749 seconds (1.10 M allocations: 74.981 MiB, 0.33% gc time, 2.08% compilation time)


In [11]:
@time ranking0_pa, gains0_pa = fit_popat(X_digits, k; calculate_gains! = calculate_gains!);

 12.635654 seconds (63.24 k allocations: 15.008 MiB)


In [12]:
@time ranking0_pa, gains0_pa = fit_popat(X_digits, k; calculate_gains! = calculate_gains!);

 12.465230 seconds (63.35 k allocations: 15.011 MiB)


In [13]:
@time ranking0_bv, gains0_bv = fit_bitvector(X_digits, k; calculate_gains! = calculate_gains!);

 13.541820 seconds (331.09 k allocations: 4.354 GiB, 2.28% gc time, 0.57% compilation time)


In [14]:
@time ranking0_bv, gains0_bv = fit_bitvector(X_digits, k; calculate_gains! = calculate_gains!);

 13.431195 seconds (65.39 k allocations: 4.340 GiB, 2.05% gc time)


In [15]:
@time ranking0_bv, gains0_bv = fit_bitvector(X_digits, k; calculate_gains! = calculate_gains!);

 13.421971 seconds (65.45 k allocations: 4.340 GiB, 2.06% gc time)


In [16]:
@time ranking0_f64v, gains0_f64v = fit_f64vector(X_digits, k; calculate_gains! = calculate_gains!);

 13.856082 seconds (843.87 k allocations: 4.456 GiB, 2.17% gc time, 1.16% compilation time)


In [17]:
@time ranking0_f64v, gains0_f64v = fit_f64vector(X_digits, k; calculate_gains! = calculate_gains!);

 13.681664 seconds (69.28 k allocations: 4.416 GiB, 2.03% gc time)


In [18]:
@time ranking0_f64v, gains0_f64v = fit_f64vector(X_digits, k; calculate_gains! = calculate_gains!);

 13.658897 seconds (69.29 k allocations: 4.416 GiB, 1.96% gc time)


In [19]:
@time ranking0_org, gains0_org = fit_f64vector_org(X_digits, k; calculate_gains! = calculate_gains!);

 18.766136 seconds (446.18 k allocations: 8.829 GiB, 2.04% gc time, 0.62% compilation time)


In [20]:
@time ranking0_org, gains0_org = fit_f64vector_org(X_digits, k; calculate_gains! = calculate_gains!);

 18.323356 seconds (83.94 k allocations: 8.809 GiB, 2.38% gc time)


In [21]:
@time ranking0_org, gains0_org = fit_f64vector_org(X_digits, k; calculate_gains! = calculate_gains!);

 18.245028 seconds (84.05 k allocations: 8.809 GiB, 1.82% gc time)


In [22]:
@show ranking0_pa == ranking0_bv == ranking0_f64v == ranking0_org
@show gains0_pa == gains0_bv == gains0_f64v == gains0_org;

ranking0_pa == ranking0_bv == ranking0_f64v == ranking0_org = true
gains0_pa == gains0_bv == gains0_f64v == gains0_org = true
