Skip to content

Commit

Permalink
Use StatsBase for neg samples sampling no allocations
Browse files Browse the repository at this point in the history
  • Loading branch information
dillondaudert committed Dec 13, 2019
1 parent 960c3f3 commit 3cc048f
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
NearestNeighborDescent = "dd2c4c9e-a32f-5b2f-b342-08c2f244fce8"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
Arpack = "0.4"
Distances = "0.8"
LsqFit = "0.6, 0.7, 0.8"
NearestNeighborDescent = "0.3"
StatsBase = "0.32"
julia = "1.3"

[extras]
Expand Down
1 change: 1 addition & 0 deletions src/UMAP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using LinearAlgebra
using LsqFit: curve_fit
using NearestNeighborDescent
using SparseArrays
using StatsBase

include("utils.jl")
include("layouts.jl")
Expand Down
15 changes: 8 additions & 7 deletions src/layouts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,26 @@ function optimize_embedding!(embedding,
end

function _optimize_embedding!(embedding::Matrix{T}, graph, alpha::T, gamma::T, neg_sample_rate::Integer, a::T, b::T) where T <: Real
@inbounds for i in 1:size(graph, 2)
sqeucl = SqEuclidean()
neg_samples = Array{Int}(undef, neg_sample_rate)
for i in 1:size(graph, 2)
for ind in nzrange(graph, i)
j = rowvals(graph)[ind]
p = nonzeros(graph)[ind]
if rand() <= p
pdist = evaluate(SqEuclidean(), view(embedding, :, i), view(embedding, :, j))
pdist = evaluate(sqeucl, view(embedding, :, i), view(embedding, :, j))
delta = pos_grad_coef(pdist, a, b)
@simd for d in 1:size(embedding, 1)
@inbounds @simd for d in 1:size(embedding, 1)
grad = clamp(delta * (embedding[d,i] - embedding[d,j]), -4, 4)
embedding[d,i] += alpha * grad
embedding[d,j] -= alpha * grad
end

for _ in 1:neg_sample_rate
k = rand(1:size(graph, 2))
for k in sample!(1:size(graph, 2), neg_samples)
i != k || continue # don't evaluate if the same point
ndist = evaluate(SqEuclidean(), view(embedding, :, i), view(embedding, :, k))
ndist = evaluate(sqeucl, view(embedding, :, i), view(embedding, :, k))
delta = neg_grad_coef(ndist, gamma, a, b)
@simd for d in 1:size(embedding, 1)
@inbounds @simd for d in 1:size(embedding, 1)
if delta > 0
grad = clamp(delta * (embedding[d, i] - embedding[d, k]), -4, 4)
embedding[d, i] += alpha * grad
Expand Down

0 comments on commit 3cc048f

Please sign in to comment.