In [30]:
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms.approximation import average_clustering
# from networkx import average_clustering

In [9]:
def all_pairs(nodes):
    for i, u in enumerate(nodes):
        for j, v in enumerate(nodes):
            if i>j:
                yield u,v


def node_clustering(G:nx.Graph, u):
    neighbors = G[u]
    k = len(neighbors)
    if k<2:
        return np.nan
    
    possible = k * (k-1)/2
    exist = 0
    for v, w in all_pairs(neighbors):
        if G.has_edge(v, w):
            exist += 1
    return exist/possible


def clustering_coefficient(G):
    cu = [node_clustering(G, node) for node in G]
    return np.nanmean(cu)

In [10]:
def read_graph(filename):
    G = nx.Graph()
    array = np.loadtxt(filename, dtype=int)
    G.add_edges_from(array)
    return G

In [11]:
fb = read_graph('facebook_combined.txt.gz')
(len(fb), len(fb.edges()))

(4039, 88234)

In [12]:
def sample_path_lengths(G:nx.Graph, nodes=None, trials=1000):
    if nodes is None:
        nodes = list(G)
    else:
        nodes = list(nodes)
    
    pairs = np.random.choice(nodes, (trials, 2))
    lengths = [nx.shortest_path_length(G, *pair)
                for pair in pairs]
    
    return lengths


def estimate_path_length(G:nx.Graph, nodes=None, trials=1000):
    return np.mean(sample_path_lengths(G, nodes, trials))

In [44]:
p = np.random.choice(list(fb), (1000, 2))

In [54]:
average_clustering(fb, 88234)


0.6059115533694495

In [45]:
for pair in p:
    print(pair)

[1412 2114]
[2046 3859]
[ 511 2238]
[2388 1014]
[   1 1176]
[ 110 1596]
[3989 2174]
[2223 2481]
[732 910]
[2718 2540]
[ 217 2401]
[1454  158]
[1682 1879]
[1950  503]
[ 702 2249]
[1978 2889]
[2676  213]
[ 348 3034]
[2574 1647]
[3736 3882]
[2461 1879]
[3198 3885]
[ 421 2308]
[3005 3202]
[ 280 3810]
[549 784]
[1785 3045]
[ 133 2791]
[3649 1838]
[1621 1132]
[1537 2069]
[3707 1006]
[1200 1550]
[ 154 3429]
[695 992]
[ 475 3427]
[ 185 2167]
[1581 2960]
[283 473]
[1552 3179]
[1265  682]
[3372 2080]
[2417  685]
[2905 2013]
[960 962]
[ 952 1311]
[2920 3957]
[ 582 3806]
[1957 2691]
[1135 2687]
[ 849 2102]
[ 824 4021]
[838 661]
[ 874 1957]
[2862 1597]
[1406  363]
[ 349 1713]
[2874 2595]
[1381  612]
[1334 3949]
[1480  409]
[ 678 3674]
[3709  578]
[3584 1715]
[1328 1053]
[2829 1046]
[1783 1387]
[3047 3971]
[3779 2611]
[3620 1821]
[2828 1831]
[1618   57]
[3037 2767]
[1373 2315]
[1959 3926]
[1509  196]
[1243 3306]
[3276 1799]
[ 669 1852]
[3355 2063]
[3269 1892]
[1601 2524]
[3617 2341]
[1700  574]
[145