# Chapter 4 - Degree Correlations

## Requirements

* set directory in the next cell

In [None]:
datadir = "../Datasets/"

In [None]:
using Graphs
using DataFrames
using CSV
using Statistics
using PyCall
using PyPlot
using Chain
using StatsBase
using GraphPlot
using Random

## Useful functions


In [None]:
## k_nn^{mode1,mode2}(l) : average mode2-degree of mode1-neighbours of nodes with mode1-degree = l
# normally mode1 and mode2 should be: degree, indegree or outdegree
deg_corr_directed(G::SimpleDiGraph, mode1::Function, mode2::Function) =
    @chain edges(G) begin
        DataFrame
        transform(:src => ByRow(x -> mode1(G, x)) => :src_deg,
            :dst => ByRow(x -> mode2(G, x)) => :dst_deg)
        groupby(:src_deg, sort=true)
        combine(:dst_deg => mean)
    end

In [None]:
## degree correlation for neutral graphs: <k^2>/<k>
function deg_corr_neutral(G, mode::Function)
    x = mode(G)
    return mean(x .^ 2) / mean(x)
end

In [None]:
# undirected, or default mode=='all' if G is directed
deg_corr(G::SimpleGraph) =
    @chain edges(G) begin
        DataFrame
        append!(_, select(_, :dst => :src, :src => :dst)) # add edge in reverse
        transform(:src => ByRow(x -> degree(G, x)) => :src_deg,
            :dst => ByRow(x -> degree(G, x)) => :dst_deg)
        groupby(:src_deg, sort=true)
        combine(:dst_deg => mean)
    end

In [None]:
assortativity(G) =
    @chain edges(G) begin
        DataFrame
        append!(_, select(_, :dst => :src, :src => :dst)) # add edge in reverse
        transform(:src => ByRow(x -> degree(G, x)) => :src_deg,
            :dst => ByRow(x -> degree(G, x)) => :dst_deg)
        cov(_.src_deg, _.dst_deg, corrected=false) /
        (std(_.src_deg, corrected=false) * std(_.dst_deg, corrected=false))
    end

In [None]:
## Correlation exponent via linear regression (taking logs)
function corr_exp(G)
    ## compute knn's
    knn = deg_corr(G)
    # Fit the regression
    x = log.(knn.src_deg)
    y = log.(knn.dst_deg_mean)
    return ([ones(length(x)) x]\y)[2]
end

In [None]:
function richClub(g, l=1)
    l_max = maximum(degree(g))
    c = countmap(degree(g))
    n = nv(g)
    moment = sum(k * ck / n for (k, ck) in pairs(c))^2
    S = [k * ck / n for (k, ck) in pairs(c) if k >= l]
    phi_hat = sum(x * y for x in S, y in S) * ne(g) / moment
    G = induced_subgraph(g, findall(>=(l), degree(g)))[1]
    phi = ne(G)
    return phi / phi_hat
end

In [None]:
function cm_simple(ds)
    @assert iseven(sum(ds))
    stubs = reduce(vcat, fill(i, ds[i]) for i in 1:length(ds))
    shuffle!(stubs)
    local_edges = Set{Tuple{Int,Int}}()
    recycle = Tuple{Int,Int}[]
    for i in 1:2:length(stubs)
        e = minmax(stubs[i], stubs[i+1])
        if (e[1] == e[2]) || (e in local_edges)
            push!(recycle, e)
        else
            push!(local_edges, e)
        end
    end

    # resolve self-loops and duplicates
    last_recycle = length(recycle)
    recycle_counter = last_recycle
    while !isempty(recycle)
        recycle_counter -= 1
        if recycle_counter < 0
            if length(recycle) < last_recycle
                last_recycle = length(recycle)
                recycle_counter = last_recycle
            else
                break
            end
        end
        p1 = popfirst!(recycle)
        from_recycle = 2 * length(recycle) / length(stubs)
        success = false
        for _ in 1:2:length(stubs)
            p2 = if rand() < from_recycle
                used_recycle = true
                recycle_idx = rand(axes(recycle, 1))
                recycle[recycle_idx]
            else
                used_recycle = false
                rand(local_edges)
            end
            if rand() < 0.5
                newp1 = minmax(p1[1], p2[1])
                newp2 = minmax(p1[2], p2[2])
            else
                newp1 = minmax(p1[1], p2[2])
                newp2 = minmax(p1[2], p2[1])
            end
            if newp1 == newp2
                good_choice = false
            elseif (newp1[1] == newp1[2]) || (newp1 in local_edges)
                good_choice = false
            elseif (newp2[1] == newp2[2]) || (newp2 in local_edges)
                good_choice = false
            else
                good_choice = true
            end
            if good_choice
                if used_recycle
                    recycle[recycle_idx], recycle[end] = recycle[end], recycle[recycle_idx]
                    pop!(recycle)
                else
                    pop!(local_edges, p2)
                end
                success = true
                push!(local_edges, newp1)
                push!(local_edges, newp2)
                break
            end
        end
        success || push!(recycle, p1)
    end
    g = SimpleGraph(length(ds))
    for e in local_edges
        add_edge!(g, e...)
    end
    return g
end

## US Airport Volume of Passengers

same data as in previous chapter, directed weighted graph (passenger volumes)

In [None]:
## read edges and build weighted directed graph
D = CSV.read(datadir * "Airports/connections.csv", DataFrame)

id2name = sort!(unique(union(D.orig_airport, D.dest_airport)))
name2id = Dict(id2name .=> axes(id2name, 1))
g = SimpleDiGraph(length(id2name))
for row in eachrow(D)
    from = name2id[row.orig_airport]
    to = name2id[row.dest_airport]
    from == to || add_edge!(g, from, to)
end
g

## Directed Degree Correlation Functions (4 cases)

We consider the 4 combinations in/out vs in/out degrees

Dashed lines are for neutral graphs

In [None]:
knn = deg_corr_directed(g, indegree, indegree)
r = deg_corr_neutral(g, indegree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
ylabel("k_nn(l)", fontsize=12);

In [None]:
knn = deg_corr_directed(g, indegree, outdegree)
r = deg_corr_neutral(g, outdegree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
ylabel("k_nn(l)", fontsize=12);

In [None]:
knn = deg_corr_directed(g, outdegree, indegree)
r = deg_corr_neutral(g, indegree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
ylabel("k_nn(l)", fontsize=12);

In [None]:
knn = deg_corr_directed(g, outdegree, outdegree)
r = deg_corr_neutral(g, outdegree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
ylabel("k_nn(l)", fontsize=12);

# We consider the undirected airport graph from now on


In [None]:
## Undirected graph
g = SimpleGraph(g)

## Degree correlation: also look via log scale

In [None]:
knn = deg_corr(g)
r = deg_corr_neutral(g, degree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree l", fontsize=12)
ylabel("k_nn(l)", fontsize=12);

In [None]:
loglog(eachcol(knn)..., "o", c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree l", fontsize=12);

## State by state assortativity and correlation exponent

In [None]:
A = CSV.read(datadir * "Airports/airports_loc.csv", DataFrame)
A.id = [name2id[a] for a in A.airport]
@assert A.id == axes(A, 1)
@assert A.airport == id2name
first(A, 5)

In [None]:
## for each state compute degree assortativity (r)
## note that we drop airports w/o in-state edge
## also estimate correlation exponent (mu) via regression (taking the logs)
## Show assortativity and mu for states with 5+ vertices
P = DataFrame(state=String[], nodes=Int[], edges=Int[], assortativity=Float64[], mu=Float64[])
for s in unique(A.state)
    hva = findall(==(s), A.state)
    G = induced_subgraph(g, hva)[1]
    G = induced_subgraph(G, findall(>(0), degree(G)))[1]
    if nv(G) > 5
        mu = corr_exp(G)
        push!(P, [s, nv(G), ne(G), assortativity(G), mu])
    end
end
sort!(P, :assortativity)
first(P, 5)

In [None]:
last(P, 5)

In [None]:
## some states are quite small,
## but we still see good correlation between r and mu
plot(P.assortativity, P.mu, "o", color="black")
xlabel("degree correlation coefficient (r)", fontsize=12)
ylabel("correlation exponent (mu)", fontsize=12)
println("Person correlation: ", cor(P.assortativity, P.mu))
ident = [-1.0, 1.0]
plot(ident, ident, ":", c="gray");

## Looking at a few states with high/low assortativity

In [None]:
## positive case: the Dakotas (ND+SD)
hva = findall(in(["SD", "ND"]), A.state)
G_D = induced_subgraph(g, hva)[1]
G_D = induced_subgraph(G_D, findall(>(0), degree(G_D)))[1]
println("r = ", assortativity(G_D))
Random.seed!(4)
gplot(G_D,
      NODESIZE=0.03, nodefillc="black",
      EDGELINEWIDTH=0.2, edgestrokec="gray")

In [None]:
## compare r and mu vs random models for the Dakotas: G = SD+ND
## here we use the configuration model
## we also report the proportion of nodes above the structural cutoff
## given the degree distribution.
r = Float64[]
mu = Float64[]
for i in 1:1000
    cm = cm_simple(degree(G_D))
    push!(r, assortativity(cm))
    push!(mu, corr_exp(cm))
end
## structural cutoff
sc = sqrt(2 * ne(G_D))
p = count(>(sc), degree(G_D)) / nv(G_D)
println("r = ", assortativity(G_D), " mu = ", corr_exp(G_D), " P(edges above structural cutoff) = ", p)
boxplot([r, mu], labels=["assortativity (r)", "correlation exponent (mu)"], widths=0.7, sym="");

In [None]:
## degree correlation function for the Dakotas graph
knn = deg_corr(G_D)
r = deg_corr_neutral(G_D, degree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree (k)", fontsize=12)
ylabel("k_nn(k)", fontsize=12);

In [None]:
## degree correlation function for a configuration model random graph used above
## quite different!
cm = cm_simple(degree(G_D))
knn = deg_corr(cm)
r = deg_corr_neutral(cm, degree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree (k)", fontsize=12)
ylabel("k_nn(k)", fontsize=12);

In [None]:
## negative case: the MO graph
hva = findall(==("MO"), A.state)
G_D = induced_subgraph(g, hva)[1]
G_D = induced_subgraph(G_D, findall(>(0), degree(G_D)))[1]
println("r = ", assortativity(G_D))
Random.seed!(4)
gplot(G_D,
      NODESIZE=0.03, nodefillc="black",
      EDGELINEWIDTH=0.2, edgestrokec="gray")

In [None]:
## r and mu vs random configuration model for MO graph
## compare r and mu vs random models for the Dakotas: G = SD+ND
## here we use the configuration model
## we also report the proportion of nodes above the structural cutoff
## given the degree distribution.
r = Float64[]
mu = Float64[]
for i in 1:1000
    cm = cm_simple(degree(G_D))
    push!(r, assortativity(cm))
    push!(mu, corr_exp(cm))
end
## structural cutoff
sc = sqrt(2 * ne(G_D))
p = count(>(sc), degree(G_D)) / nv(G_D)
println("r = ", assortativity(G_D), " mu = ", corr_exp(G_D), " P(edges above structural cutoff) = ", p)
boxplot([r, mu], labels=["assortativity (r)", "correlation exponent (mu)"], widths=0.7, sym="");

In [None]:
## degree correlation function for MO graph
knn = deg_corr(G_D)
r = deg_corr_neutral(G_D, degree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree (k)", fontsize=12)
ylabel("k_nn(k)", fontsize=12);

In [None]:
## degree correlation function for a configuration random graph
## quite similar!
cm = cm_simple(degree(G_D))
knn = deg_corr(cm)
r = deg_corr_neutral(cm, degree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree (k)", fontsize=12)
ylabel("k_nn(k)", fontsize=12);

In [None]:
## state with r = -1 (NE)
hva = findall(==("NE"), A.state)
G_D = induced_subgraph(g, hva)[1]
G_D = induced_subgraph(G_D, findall(>(0), degree(G_D)))[1]
println("r = ", assortativity(G_D))
Random.seed!(4)
gplot(G_D,
      NODESIZE=0.03, nodefillc="black",
      EDGELINEWIDTH=0.2, edgestrokec="gray")

In [None]:
## state with r = +1 (AR)
hva = findall(==("AR"), A.state)
G_D = induced_subgraph(g, hva)[1]
G_D = induced_subgraph(G_D, findall(>(0), degree(G_D)))[1]
println("r = ", assortativity(G_D))
Random.seed!(4)
gplot(G_D,
      NODESIZE=0.03, nodefillc="black",
      EDGELINEWIDTH=0.2, edgestrokec="gray")

## Back to Overall US Airport graph 

- friendship paradox illustration
- looking for rich club phenomenon


In [None]:
## plot degree vs avg neighbour degree
## friendship 'paradox' US Airport graph (overall)
deg = degree(g)
nad = [mean(degree(g, neighbors(g, v))) for v in 1:nv(g)]
scatter(deg, nad, c="black", marker=".")
xlim((0, 200))
ylim((0, 200))
xlabel("node degree", fontsize=14)
ylabel("average neighbour degree", fontsize=14);
plot([0, 200], [0, 200], "--", c="gray")
print("r = ", assortativity(g));

In [None]:
## looking for rich club -- not here!
d = unique(degree(g))
rc = richClub.(Ref(g), d)
semilogx(d, rc, ".", c="black")
xlabel("degree l", fontsize=12)
ylabel("rich club coefficient rho(l)");

## Spatial Preferential Attachment (SPA) Graphs

We consider the SPA model generating directed graphs. In the experiment below, we generate all graphs on a 2-dim unit square but we use torus distance. However for plotting, we drop the edges that wrap around the unit square, to get a nicer display.

More details are provided in the book.

In [None]:
function torusDist(U, V)
    dx = abs(V[1] - U[1])
    dy = abs(V[2] - U[2])
    dx = (dx > 0.5) ? 1 - dx : dx
    dy = (dy > 0.5) ? 1 - dy : dy
    return sqrt(dx * dx + dy * dy)
end

function squareDist(U, V)
    dx = abs(V[1] - U[1])
    dy = abs(V[2] - U[2])
    return sqrt(dx * dx + dy * dy)
end

# build a SPA graph given the parameters described above
function buildSPA(n::Int, p::Float64, A1::Int, A2::Int, seed::Int=12345)
    ## random points
    rng = MersenneTwister(seed)
    X = rand(rng, n, 2)

    ## initialize new graph
    t = 0
    r = zeros(n)
    G = DiGraph(n)

    ## add vertices
    for v in 1:n
        for i in 1:v
            if torusDist(X[v, :], X[i, :]) < r[i]
                if rand() < p
                    add_edge!(G, v, i)
                end
            end
        end
        t += 1
        r = [sqrt(min(1, (A1 * d + A2) / t) / pi) for d in indegree(G)]
    end
    return G, X
end

In [None]:
#Generate a SPA graph - build with the torus distance
n = 200
p = 0.667
A1 = 1
A2 = 3
seed = 1234

G_spa, X = buildSPA(n, p, A1, A2, seed)
print("number of edges: ", ne(G_spa))

In [None]:
## Plot the above graph - ignore edges that "wrap around"
E = Edge[]
for e in edges(G_spa)
    a = X[e.src, :]
    b = X[e.dst, :]
    if torusDist(a, b) == squareDist(a, b)
        push!(E, e)
    end
end

## plot with those edges only
G_sq, _ = induced_subgraph(G_spa, E)
gplot(G_sq, X[:, 1], X[:, 2], nodefillc="black")

In [None]:
## statistics for the SPA graph

println("Assortativity: ", assortativity(G_spa))
println("Average out degree: ", mean(outdegree(G_spa)))
println("expected average out degree:", p * A2 / (1 - p * A1))

In [None]:
#Fitted vs expected power law exponent (in degrees)
d = indegree(G_spa)
powerlaw = pyimport("powerlaw");
X = powerlaw.Fit(d, verbose=false)
println("Value of gamma: ", X.power_law.alpha)
println("Expected power law exponent:", 1 + 1 / (p * A1))

In [None]:
## degree correlation
knn = deg_corr_directed(G_spa, indegree, indegree)
scatter(knn.src_deg, knn.dst_deg_mean);
xlabel(L"degree ($\ell$)", fontsize=12)
ylabel(L"$k_{nn}(\ell)$", fontsize=12);

## Assortativity in geometric and uniform random graphs

We consider two families of (undirected) random graphs we saw in Chapter 2: random geometric graphs and Erdos-Renyi graphs.

In [None]:
n = 1000
boundary = :periodic ## Set to :open to see a torus-based RGG, else we use the constrained unit square
rng_seed = 1234

df_rg = DataFrame(["RGG", "Uniform", "avg_degree"] .=> Ref(Float64[]))
for radius in 0.025:0.025:0.501
    g_RGG, _, pos = euclidean_graph(n, 2, seed=rng_seed, cutoff=radius, bc=boundary)
    m = ne(g_RGG)
    g_ER = erdos_renyi(n, m, seed=rng_seed)
    push!(df_rg, [assortativity(g_RGG), assortativity(g_ER), mean(degree(g_RGG))])
end
df_rg

In [None]:
## degree assortativity vs average degree
plot(df_rg.avg_degree, df_rg.RGG, ".-", label="RGG", color="black")
plot(df_rg.avg_degree, df_rg.Uniform, ".-", label="Uniform", color="dimgray")
legend()
xlabel("average degree", fontsize=14)
ylabel("assortativity", fontsize=14);

In [None]:
## radius = 0.1
n = 1000
rng_seed = 1234

g_RGG, _ = euclidean_graph(n, 2, seed=rng_seed, cutoff=0.1, bc=:periodic)
g_ER = erdos_renyi(n, ne(g_RGG), seed=rng_seed)
println("average degree: ", mean(degree(g_RGG)))

## plot for RGG
knn = deg_corr(g_RGG)
r = deg_corr_neutral(g_RGG, degree)
figure(figsize=(10, 4))
subplot(121)
scatter(knn.src_deg, knn.dst_deg_mean, c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":", color="black")
xlabel(L"degree ($\ell$)", fontsize=12)
ylabel(L"$k_{nn}(\ell)$", fontsize=12)
title("RGG with radius 0.1");

## plot for ER
knn = deg_corr(g_ER)
r = deg_corr_neutral(g_ER, degree)
subplot(122)
scatter(knn.src_deg, knn.dst_deg_mean, c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":", color="black")
xlabel(L"degree ($\ell$)", fontsize=12)
title("Uniform RG with same average degree");

In [None]:
## radius = 0.5
n = 1000
rng_seed = 1234

g_RGG, _ = euclidean_graph(n, 2, seed=rng_seed, cutoff=0.5, bc=:periodic)
g_ER = erdos_renyi(n, ne(g_RGG), seed=rng_seed)
println("average degree: ", mean(degree(g_RGG)))

## plot for RGG
knn = deg_corr(g_RGG)
r = deg_corr_neutral(g_RGG, degree)
figure(figsize=(10, 4))
subplot(121)
scatter(knn.src_deg, knn.dst_deg_mean, c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":", color="black")
xlabel(L"degree ($\ell$)", fontsize=12)
ylabel(L"$k_{nn}(\ell)$", fontsize=12)
title("RGG with radius 0.5");

## plot for ER
knn = deg_corr(g_ER)
r = deg_corr_neutral(g_ER, degree)
subplot(122)
scatter(knn.src_deg, knn.dst_deg_mean, c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":", color="black")
xlabel(L"degree ($\ell$)", fontsize=12)
title("Uniform RG with same average degree");

## A quick look: Europe electric grid network

We notice:

- degree distribution quite uniform
- positive assortativity, also seen with degree correlation function
- friendship paradox not obvious

In [None]:
## Europe Electric Grid
edge_list = split.(readlines(datadir * "GridEurope/gridkit_europe-highvoltage.edges"))
vertex_ids = unique(reduce(vcat, edge_list))
vertex_map = Dict(vertex_ids .=> 1:length(vertex_ids))
grid = SimpleGraph(length(vertex_ids))
foreach(((from, to),) -> add_edge!(grid, vertex_map[from], vertex_map[to]), edge_list)

In [None]:
## plot degree vs avg neighbour degree

deg = degree(grid)
nad = [mean(degree(grid, neighbors(grid, v))) for v in 1:nv(grid)]
scatter(deg, nad, c="black", marker=".")
xlim((0, 18))
ylim((0, 18))
xlabel("node degree", fontsize=14)
ylabel("average neighbour degree", fontsize=14);
plot([0, 18], [0, 18], "--", c="gray")
print("r = ", assortativity(grid));

In [None]:
## Degree correlation function

knn = deg_corr(grid)
r = deg_corr_neutral(grid, degree)
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree (k)", fontsize=12)
ylabel("k_nn(k)", fontsize=12);

## Quick look: GitHub Developers Graph

- negative assortativity
- strong friendship paradox phenomenon

In [None]:
## GitHub Developers (undirected)
D = CSV.read(datadir * "GitHubDevelopers/musae_git_edges.csv", DataFrame) .+ 1
max_node_id = max(maximum(D.id_1), maximum(D.id_2))
git = SimpleGraph(max_node_id)
foreach(row -> add_edge!(git, row...), eachrow(D))

In [None]:
## plot degree vs avg neighbour degree
## zoom in on nodes with degree < LIM
LIM = 1000
deg = degree(git)
nad = [mean(degree(git, neighbors(git, v))) for v in 1:nv(git)]
scatter(deg, nad, c="black", marker=".")
xlim((0, LIM))
ylim((0, LIM))
xlabel("node degree", fontsize=14)
ylabel("average neighbour degree", fontsize=14);
plot([0, LIM], [0, LIM], "--", c="gray")
print("r = ", assortativity(git));

In [None]:
## degree correlation function
knn = deg_corr(git)
r = deg_corr_neutral(git, degree)
xlim((0, LIM))
scatter(eachcol(knn)..., c="black")
hlines(y=r, xmin=minimum(knn.src_deg), xmax=maximum(knn.src_deg), linestyles=":")
xlabel("degree (k)", fontsize=12)
ylabel("k_nn(k)", fontsize=12);

In [None]:
## still no rich club group here
d = unique(degree(git))
rc = richClub.(Ref(git), d)
semilogx(d, rc, ".", c="black")
xlabel("degree l", fontsize=12)
ylabel("rich club coefficient rho(l)");

## Showing a rich club: the actors graph

This data set is part of the accompanying material of the book "Complex Networks: Principles, Methods and Applications", V. Latora, V. Nicosia, G. Russo, Cambridge University Press (2017)

248,243 nodes (actors) and 8,302,734 edges (co-appearing in at least 1 movie)


In [None]:
D = CSV.read(datadir * "Actors/movie_actors.net", header=[:id_1, :id_2], DataFrame) .+ 1
max_node_id = max(maximum(D.id_1), maximum(D.id_2))
g = SimpleGraph(max_node_id)
foreach(row -> add_edge!(g, row...), eachrow(D))
g = induced_subgraph(g, findall(>(0), degree(g)))[1]

d = sample(unique(degree(g)), 200, replace=false)

rc = richClub.(Ref(g), d)
semilogx(d, rc, ".", c="black")
xlabel("degree l", fontsize=12)
ylabel("rich club coefficient rho(l)");

## Figures 4.1 and 4.2: Xulvi-Brunet and Sokolov algorithm

Version in book obtained with faster Julia code. We show a smaller scale example here.


In [None]:
## Naive Xulvi-Brunet and Sokolov algorithm

function XBS(n, λ, q, assortative, seed)
    p = λ / n
    Random.seed!(seed)
    g = erdos_renyi(n, p)
    q == 0 && return g
    e = [(x.src, x.dst) for x in edges(g)]
    touched = falses(length(e))
    count_touched = 0
    while count_touched < length(e)
        i, j = rand(axes(e, 1)), rand(axes(e, 1))
        if i != j
            vs = [e[i]..., e[j]...]
            if allunique(vs)
                if rand() < q
                    sort!(vs, by=x -> degree(g, x))
                    if !assortative
                        vs[2], vs[4] = vs[4], vs[2]
                    end
                else
                    shuffle!(vs)
                end
                a1, a2, b1, b2 = vs
                if ((a1, a2) == e[i] || (a1, a2) == e[j])
                    count_touched += !touched[i] + !touched[j]
                    touched[i] = true
                    touched[j] = true
                else
                    if !has_edge(g, a1, a2) && !has_edge(g, b1, b2)
                        @assert rem_edge!(g, e[i]...)
                        @assert rem_edge!(g, e[j]...)
                        e[i] = (a1, a2)
                        e[j] = (b1, b2)
                        add_edge!(g, a1, a2)
                        add_edge!(g, b1, b2)
                        count_touched += !touched[i] + !touched[j]
                        touched[i] = true
                        touched[j] = true
                    end
                end
            end
        end
    end
    @assert all(touched)
    return g
end

In [None]:
## Fig 4.2 with N nodes and averaging Rep results
N = 2^9 ## we use 2^16 and Rep=64 in book
Rep = 8
seeds = rand(UInt64, Rep)

df = DataFrame()
for seed in seeds
    for (q, a) in ((0.0, true), (1 / 3, true), (2 / 3, true), (1 / 3, false), (2 / 3, false))
        for d in 0.25:0.25:3
            g = XBS(2^9, d, q, a, seed)
            c = maximum(length, connected_components(g)) / nv(g)
            push!(df, (seed=seed, q=q, a=a, d=d, component=c))
        end
    end
end

@chain df begin
    groupby([:q, :a, :d])
    combine(:component => mean => :component)
    groupby([:q, :a])
    foreach((c, s, sdf) -> plot(sdf.d, sdf.component, color=c, linestyle=s),
        ["black", "gray", "black", "gray", "black"],
        ["-", "-", ":", "--", "--"], _[[3, 2, 1, 4, 5]])
end
ylim([0.0, 1.0])
xlim([0.0, 3.0])
xlabel("average degree")
ylabel("fraction of nodes")
legend(["assortative, q=2/3", "assortative, q=1/3", "q=0",
    "dissortative, q=1/3", "dissortative, q=2/3"])

In [None]:
## Fig 4.1

Random.seed!(1234)
seeds = rand(UInt64, Rep)
df = DataFrame()
for seed in seeds
    for (q, a) in ((0.0, true), (1 / 3, true), (2 / 3, true), (1 / 3, false), (2 / 3, false))
        g = XBS(2^9, 4, q, a, seed)
        g = induced_subgraph(g, findall(>(0), degree(g)))[1]
        ds, knns = eachcol(deg_corr(g))
        append!(df, DataFrame(seed=seed, q=q, a=a, ds=ds, knns=knns))
    end
end

@chain df begin
    groupby([:q, :a, :ds])
    combine(:knns => mean => :knns)
    sort(:ds)
    groupby([:q, :a])
    foreach((c, s, sdf) -> plot(sdf.ds, sdf.knns, color=c, linestyle=s),
        ["black", "gray", "black", "gray", "black"],
        ["-", "-", ":", "--", "--"], _[[3, 2, 1, 4, 5]])
end
xlabel("degree (\$\\ell\$)")
ylabel("\$k_{nn} (\\ell)\$")
xticks(1:2:17)
xlim([0.0, 10.0])
ylim([3, 8.0])
legend(["assortative, q=2/3", "assortative, q=1/3", "q=0",
        "dissortative, q=1/3", "dissortative, q=2/3"],
    loc="upper center", ncol=2)
