In [84]:
using LightGraphs, GraphIO
using TimerOutputs
using StatsBase

┌ Info: Precompiling StatsBase [2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91]
└ @ Base loading.jl:1260


In [2]:
const to = TimerOutput()
dir = "input_files"
input_files = filter(x -> isfile(joinpath(dir, x)), readdir(dir))

6-element Array{String,1}:
 "airtc.txt"
 "dolphins.txt"
 "facebook_combined.txt"
 "karateclub.txt"
 "lesmis.txt"
 "mcldata.txt"

In [5]:
for file in input_files[4:4]
  filename = joinpath(dir, file)
  println(filename)
  g = SimpleGraph(loadgraph(filename, "graph", GraphIO.EdgeList.EdgeListFormat()))
#   @timeit to filename result = pseudo_random_walk(filename)
end

input_files\karateclub.txt


In [254]:
function label_partition(G; k=8)
  unlabeled_nodes = Set(vertices(g));
  labeled_nodes = Set();
  labels = zeros(Int, nv(g));
  random_k_nodes = sample(vertices(g), k, replace=false);
  for (i, r) in enumerate(random_k_nodes)
    labels[r] = i;
    unlabeled_nodes = delete!(unlabeled_nodes, r)
    labeled_nodes = push!(labeled_nodes, r)
  end
  
  while !isempty(unlabeled_nodes)
    for n in copy(labeled_nodes)
      n_neighbors = setdiff(Set(neighbors(g, n)), labeled_nodes)
      if !isempty(n_neighbors)
        neighbor = sample(collect(n_neighbors), 1)[1];
        labels[neighbor] = labels[n];
        unlabeled_nodes = delete!(unlabeled_nodes, neighbor);
        labeled_nodes = push!(labeled_nodes, neighbor);
      end
    end
  end
  return labels
end

function graph_partition(g; k=8, c=10)
  best_partition = []
  best_modularity = -Inf
  for i in 1:c
    partitions = label_partition(g, k)
    partition_modularity = modularity(g, partitions)
    println(partition_modularity)
    if (best_modularity < partition_modularity)
      best_partition = partitions
      best_modularity = partition_modularity
    end
  end
  return best_partition, best_modularity
end

graph_partition (generic function with 1 method)

In [255]:
filename = joinpath(dir, input_files[3])
g = SimpleGraph(loadgraph(filename, "graph", GraphIO.EdgeList.EdgeListFormat()))

{4037, 88234} undirected simple Int64 graph

In [258]:
@timeit to filename graph_partition(g; k=8, c=25)

0.46422860653626435
0.4953345279588237
0.45645217007780103
0.5572376392318827
0.4323467302985306
0.40444082171832624
0.36661519013149196
0.4238655853305471
0.40122982417753356
0.34986252793478234
0.3776476500459429
0.5324217257377585
0.31479973576419884
0.35232138271110464
0.5003936264957369
0.3383283110257897
0.4499145230517835
0.44401340994017935
0.572846504009003
0.34394878251952227
0.33547869764907695
0.3612668165870185
0.41253736961821186
0.30063991896795206
0.4655708143769612


([7, 7, 7, 7, 7, 7, 7, 7, 7, 7  …  5, 5, 5, 5, 5, 5, 5, 5, 5, 5], 0.572846504009003)

In [259]:
to

[0m[1m ──────────────────────────────────────────────────────────────────────────────[22m
[0m[1m                               [22m        Time                   Allocations      
                               ──────────────────────   ───────────────────────
       Tot / % measured:            2871s / 0.84%           42.7GiB / 46.8%    

 Section               ncalls     time   %tot     avg     alloc   %tot      avg
 ──────────────────────────────────────────────────────────────────────────────
 input_files\facebo...      9    24.0s   100%   2.67s   20.0GiB  100%   2.22GiB
[0m[1m ──────────────────────────────────────────────────────────────────────────────[22m

In [252]:
test = Set([1, 2, 3, 4])
for t in copy(test)
  println(t)
  test = push!(test, t+10)
end

4
2
3
1


In [253]:
test

Set{Int64} with 8 elements:
  4
  14
  13
  2
  3
  11
  12
  1