In [3]:
%matplotlib inline
import networkx as nx
import matplotlib.cm as cm
import matplotlib.pyplot as plt
In [4]:
import networkx as nx

G=nx.Graph() # G = nx.DiGraph() # 有向网络
# 添加(孤立)节点
G.add_node("spam")
# 添加节点和链接
G.add_edge(1,2)

print(G.nodes())

print(G.edges())
[1, 2, 'spam']
[(1, 2)]
In [5]:
# 绘制网络
nx.draw(G, with_labels = True)
In [6]:
G = nx.Graph()
n = 0
with open ('/Users/sara/github/bigdata/www.dat.gz.txt') as f:
    for line in f:
        n += 1
        #if n % 10**4 == 0:
            #flushPrint(n)
        x, y = line.rstrip().split(' ')
        G.add_edge(x,y)
In [7]:
nx.info(G)
Out[7]:
'Name: \nType: Graph\nNumber of nodes: 325729\nNumber of edges: 1117563\nAverage degree:   6.8619'
In [8]:
G = nx.karate_club_graph()
 
clubs = [G.node[i]['club'] for i in G.nodes()]
colors = []
for j in clubs:
    if j == 'Mr. Hi':
        colors.append('r')
    else:
        colors.append('g')
 
nx.draw(G,  with_labels = True, node_color = colors)
In [9]:
G.node[1] # 节点1的属性
Out[9]:
{'club': 'Mr. Hi'}
In [10]:
G.edge.keys()[:3] # 前三条边的id
Out[10]:
[0, 1, 2]
In [11]:
nx.info(G)
Out[11]:
"Name: Zachary's Karate Club\nType: Graph\nNumber of nodes: 34\nNumber of edges: 78\nAverage degree:   4.5882"
In [12]:
G.nodes()[:10]
Out[12]:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
In [13]:
G.edges()[:3]
Out[13]:
[(0, 1), (0, 2), (0, 3)]
In [14]:
G.neighbors(1)
Out[14]:
[0, 2, 3, 7, 13, 17, 19, 21, 30]
In [15]:
nx.average_shortest_path_length(G)
Out[15]:
2.408199643493761
In [16]:
nx.diameter(G)#返回图G的直径(最长最短路径的长度)
Out[16]:
5
In [17]:
nx.density(G)
Out[17]:
0.13903743315508021
In [18]:
nodeNum = len(G.nodes())
edgeNum = len(G.edges())

2.0*edgeNum/(nodeNum * (nodeNum - 1))
Out[18]:
0.13903743315508021
In [19]:
cc = nx.clustering(G)
cc.items()[:5]
Out[19]:
[(0, 0.15),
 (1, 0.3333333333333333),
 (2, 0.24444444444444444),
 (3, 0.6666666666666666),
 (4, 0.6666666666666666)]
In [20]:
plt.hist(cc.values(), bins = 15)
plt.xlabel('$Clustering \, Coefficient, \, C$', fontsize = 20)
plt.ylabel('$Frequency, \, F$', fontsize = 20)
plt.show()
In [21]:
# M. E. J. Newman, Mixing patterns in networks Physical Review E, 67 026126, 2003
nx.degree_assortativity_coefficient(G) #计算一个图的度匹配性。
Out[21]:
-0.47561309768461457
In [22]:
Ge=nx.Graph()
Ge.add_nodes_from([0,1],size=2)
Ge.add_nodes_from([2,3],size=3)
Ge.add_edges_from([(0,1),(2,3)])
print(nx.numeric_assortativity_coefficient(Ge,'size'))
1.0
In [23]:
# plot degree correlation  
from collections import defaultdict
import numpy as np

l=defaultdict(list)
g = nx.karate_club_graph()

for i in g.nodes():
    k = []
    for j in g.neighbors(i):
        k.append(g.degree(j))
    l[g.degree(i)].append(np.mean(k))   
    #l.append([g.degree(i),np.mean(k)])
  
x = l.keys()
y = [np.mean(i) for i in l.values()]

#x, y = np.array(l).T
plt.plot(x, y, 'r-o', label = '$Karate\;Club$')
plt.legend(loc=1,fontsize=10, numpoints=1)
plt.xscale('log'); plt.yscale('log')
plt.ylabel(r'$<knn(k)$> ', fontsize = 20)
plt.xlabel('$k$', fontsize = 20)
plt.show()
In [24]:
dc = nx.degree_centrality(G)
closeness = nx.closeness_centrality(G)
betweenness= nx.betweenness_centrality(G)
In [25]:
fig = plt.figure(figsize=(15, 4),facecolor='white')
ax = plt.subplot(1, 3, 1)
plt.hist(dc.values(), bins = 20)
plt.xlabel('$Degree \, Centrality$', fontsize = 20)
plt.ylabel('$Frequency, \, F$', fontsize = 20)

ax = plt.subplot(1, 3, 2)
plt.hist(closeness.values(), bins = 20)
plt.xlabel('$Closeness \, Centrality$', fontsize = 20)

ax = plt.subplot(1, 3, 3)
plt.hist(betweenness.values(), bins = 20)
plt.xlabel('$Betweenness \, Centrality$', fontsize = 20)
plt.tight_layout()
plt.show()
In [26]:
fig = plt.figure(figsize=(15, 8),facecolor='white')

for k in betweenness:
    plt.scatter(dc[k], closeness[k], s = betweenness[k]*1000)
    plt.text(dc[k], closeness[k]+0.02, str(k))
plt.xlabel('$Degree \, Centrality$', fontsize = 20)
plt.ylabel('$Closeness \, Centrality$', fontsize = 20)
plt.show()
In [27]:
from collections import defaultdict
import numpy as np

def plotDegreeDistribution(G):
    degs = defaultdict(int)
    for i in G.degree().values(): degs[i]+=1
    items = sorted ( degs.items () )
    x, y = np.array(items).T
    y_sum = np.sum(y)
    y = [float(i)/y_sum for i in y]
    plt.plot(x, y, 'b-o')
    plt.xscale('log')
    plt.yscale('log')
    plt.legend(['Degree'])
    plt.xlabel('$K$', fontsize = 20)
    plt.ylabel('$P(K)$', fontsize = 20)
    plt.title('$Degree\,Distribution$', fontsize = 20)
    plt.show()   
    
G = nx.karate_club_graph()     
plotDegreeDistribution(G)
In [30]:
import networkx as nx
import matplotlib.pyplot as plt
BA= nx.random_graphs.barabasi_albert_graph(200,2)  #生成n=20、m=1的BA无标度网络
pos = nx.spring_layout(BA)          #定义一个布局,此处采用了spring布局方式
nx.draw(BA,pos,with_labels=False,node_size = 30)  #绘制图形
plt.show()
In [31]:
plotDegreeDistribution(BA)
In [32]:
BA= nx.random_graphs.barabasi_albert_graph(20000,2)  #生成n=20、m=1的BA无标度网络
plotDegreeDistribution(BA)
In [33]:
import networkx as nx
import matplotlib.pyplot as plt
BA= nx.random_graphs.barabasi_albert_graph(500,1)  #生成n=20、m=1的BA无标度网络
pos = nx.spring_layout(BA)          #定义一个布局,此处采用了spring布局方式
nx.draw(BA,pos,with_labels=False,node_size = 30)  #绘制图形
plt.show()
In [34]:
nx.degree_histogram(BA)[:3]
Out[34]:
[0, 322, 101]
In [35]:
BA.degree().items()[:3]
Out[35]:
[(0, 2), (1, 33), (2, 15)]
In [36]:
plt.hist(BA.degree().values())
plt.show()
In [37]:
from collections import defaultdict
import numpy as np
def plotDegreeDistributionLongTail(G):
    degs = defaultdict(int)
    for i in G.degree().values(): degs[i]+=1
    items = sorted ( degs.items () )
    x, y = np.array(items).T
    y_sum = np.sum(y)
    y = [float(i)/y_sum for i in y]
    plt.plot(x, y, 'b-o')
    plt.legend(['Degree'])
    plt.xlabel('$K$', fontsize = 20)
    plt.ylabel('$P_K$', fontsize = 20)
    plt.title('$Degree\,Distribution$', fontsize = 20)
    plt.show()  
    
BA= nx.random_graphs.barabasi_albert_graph(5000,2)  #生成n=20、m=1的BA无标度网络    
plotDegreeDistributionLongTail(BA)
In [38]:
def plotDegreeDistribution(G):
    degs = defaultdict(int)
    for i in G.degree().values(): degs[i]+=1
    items = sorted ( degs.items () )
    x, y = np.array(items).T
    x, y = np.array(items).T
    y_sum = np.sum(y)
    plt.plot(x, y, 'b-o')
    plt.xscale('log')
    plt.yscale('log')
    plt.legend(['Degree'])
    plt.xlabel('$K$', fontsize = 20)
    plt.ylabel('$P_K$', fontsize = 20)
    plt.title('$Degree\,Distribution$', fontsize = 20)
    plt.show()   

BA= nx.random_graphs.barabasi_albert_graph(50000,2)  #生成n=20、m=1的BA无标度网络        
plotDegreeDistribution(BA)
In [39]:
Ns = [i*10 for i in [1, 10, 100, 1000]]
ds = []
for N in Ns:
    print N
    BA= nx.random_graphs.barabasi_albert_graph(N,2)
    d = nx.average_shortest_path_length(BA)
    ds.append(d)
10
100
1000
10000
In [40]:
plt.plot(Ns, ds, 'r-o')
plt.xlabel('$N$', fontsize = 20)
plt.ylabel('$<d>$', fontsize = 20)
plt.xscale('log')
plt.show()