In [None]:
using DrWatson
@quickactivate projectdir()

In [None]:
include(srcdir("rdpg.jl"))
using Main.rdpg
using StatsBase, Pipe, Graphs, GraphIO, LightGraphs, DelimitedFiles, Random
using ProgressMeter, DataFrames
using Plots, Ripserer, PersistenceDiagrams, PersistenceDiagramsBase
using Distances, LinearAlgebra, UMAP, TSne
using SparseArrays

In [None]:
function read_graph(; path, delim='\t', labels=nothing)
    data = Int.(readdlm(path, delim))
    if labels !== nothing
        rownames = labels[:, 1]
        n = length(unique(data))
        indx = map(i -> findall(j -> j == i, rownames), data)
        A = sparse(indx[:, 1], indx[:, 2], Int(1), n, n)
    else
        tmp = unique(data)
        n = length(tmp)
        data .= minimum(tmp) != 1 ? data .+ 1 : data
        A = sparse(data[:, 1], data[:, 2], Int(1), n, n)
    end
    return A |> LightGraphs.LinAlg.symmetrize
end

In [None]:
dim = 100
n = 20000
subsample = true
path_to_graph = "../data/tmpdata/large_twitch_edges.csv"
path_to_labels = "../data/tmpdata/large_twitch_features.csv"

In [None]:
labels, cols = readdlm(path_to_labels, ',', header=true)
langs = labels[:, 8]
Adjacency = read_graph(path=path_to_graph, delim=',', labels=nothing)

In [None]:
subsample = false
downsample = true

In [None]:
if subsample
    subsample_indices = sample(eachindex(langs), n, replace=false)
    Adjacency = Adjacency[subsample_indices, subsample_indices]
    langs = langs[subsample_indices]
    labels = labels[subsample_indices, :]
end

In [None]:
if !downsample

    ind1 = findall(i -> langs[i] ∈ ["FR", "RU", "ZH"], eachindex(langs))
    indx = sample(ind1, min(length(ind1), 2000), replace=false)
    labs = labels[:, :]

    A = copy(Adjacency)
    Xhat, _ = rdpg.spectralEmbed(A, d=dim, scale=false)

else

    ind1 = findall(i -> langs[i] ∈ ["FR", "RU", "ZH"], eachindex(langs))
    indx = sample(eachindex(ind1), min(length(ind1), 2000), replace=false)

    labs = labels[ind1, :]
    A = copy(Adjacency)[ind1, ind1]
    Xhat, _ = rdpg.spectralEmbed(A, d=dim, scale=false)

end;

In [None]:
Xnh = Xhat[indx, :]

embedding_umap_x = umap(Xnh', 2; n_neighbors=25, metric=Euclidean())'

plt_umap_x = scatter(
    embedding_umap_x |> rdpg._Matrix_to_ArrayOfTuples,
    ms=3, legend=:bottomleft, size=(350, 300),# lim=(-12, 12),
    group=labs[indx, 8],
    title="ϵ = ∞"
)

In [None]:
# plot(plt_umap_x, lim=(), size=(350, 300), title="ϵ = ∞"); savefig(plotsdir("twitch/plt_umap_x.svg"));
savefig(plotsdir("twitch/plt_umap_x.svg"))

In [None]:
ϵ = 1.0 * log(size(A, 1))


B = (rdpg.edgeFlip(A, ϵ=ϵ) .- rdpg.τ(ϵ)) ./ rdpg.σ(ϵ)^2
Yhat, _ = rdpg.spectralEmbed(B, d=dim, scale=false)
Ynh1 = Yhat[indx, :];

In [None]:
embedding_umap_y1 = umap(Ynh1', 2; n_neighbors=25, metric=Euclidean())'

plt_umap_y1 = scatter(
    embedding_umap_y1 |> rdpg._Matrix_to_ArrayOfTuples,
    ms=3, legend=:bottomleft, size=(350, 300),# lim=(-12, 12),
    group=labs[indx, 8],
    title="ϵ ≍ log(n)"
)

In [None]:
# plot(plt_umap_y1, lim=(), size=(350, 300), title="ϵ ≍ √log(n)"); savefig(plotsdir("twitch/plt_umap_y1.svg"));
savefig(plotsdir("twitch/plt_umap_y1.svg"))

In [None]:
ϵ = 2.0 * sqrt(log(size(A, 1)))


B = (rdpg.edgeFlip(A, ϵ=ϵ) .- rdpg.τ(ϵ)) ./ rdpg.σ(ϵ)^2
Yhat, _ = rdpg.spectralEmbed(B, d=dim, scale=false)
Ynh2 = Yhat[indx, :];

In [None]:
embedding_umap_y2 = umap(Ynh2', 2; n_neighbors=25, metric=Euclidean())'

plt_umap_y2 = scatter(
    embedding_umap_y2 |> rdpg._Matrix_to_ArrayOfTuples,
    ms=3, legend=:bottomleft, size=(350, 300),# lim=(-12, 12),
    group=labs[indx, 8],
    title="ϵ ≍ √(log(n))"
)

In [None]:
# plot(plt_umap_y3, lim=(), size=(350, 300), title="ϵ ≍ 1.0 × √log(n)"); savefig(plotsdir("twitch/plt_umap_y3.svg"));
savefig(plotsdir("twitch/embedding_umap_y2.svg"))

In [None]:
ϵ = 2.0 * log(log(size(A, 1)))



B = (rdpg.edgeFlip(A, ϵ=ϵ) .- rdpg.τ(ϵ)) ./ rdpg.σ(ϵ)^2
Yhat, _ = rdpg.spectralEmbed(B, d=dim, scale=false)
Ynh3 = Yhat[indx, :];

In [None]:
embedding_umap_y3 = umap(Ynh3', 2; n_neighbors=25, metric=Euclidean())'

plt_umap_y3 = scatter(
    embedding_umap_y3 |> rdpg._Matrix_to_ArrayOfTuples,
    ms=3, legend=:bottomleft, size=(350, 300),# lim=(-12, 12),
    group=labs[indx, 8],
    title="ϵ ≍ log(log(n))"
)

In [None]:
# plot(plt_umap_y4, lim=(), size=(350, 300), title="ϵ ≍ log(log(n))"); savefig(plotsdir("twitch/plt_umap_y4.svg"));
savefig(plotsdir("twitch/embedding_umap_y3.svg"))