In [None]:
import collections
import math
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import pandas as pd
import seaborn as sns

# Type of the graph


In [None]:
actors_agg_adj = np.load("sparse_agg_actor_adj.npy")
actors_agg_df = pd.read_pickle("actors_agg_df.pkl")
actors_graph = nx.from_numpy_matrix(actors_agg_adj)

It is not a regular graph because nodes does not have the same degree as seen below:

In [None]:
# regular graph
for node, degree in sorted(actors_graph.degree(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"node {node}: degree: {degree}")

In [None]:
# degree list with node_id as index
degree_list = [degree for node, degree in actors_graph.degree()]

In [None]:
degree_sequence = sorted(degree_list, reverse=True)  # degree sequence
degreeCount = collections.Counter(degree_sequence)
deg, cnt = zip(*degreeCount.items())

plt.figure(figsize=(15, 10))
plt.bar(deg, cnt, width=0.80, color="b")
plt.title("Degree Distribution")
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.grid(True)
plt.savefig("plots/degree_distribution.png")
plt.show()

Other graphs type is the scale free or Power law graph, however looking at the degree distribution clearly we can see that it is not a power-law 

Looking at the degree distribution we can see that graph is a random graph

In [None]:
# Average Degree
average_degree = sum([degree for node, degree in actors_graph.degree()]) / len(
    actors_graph.nodes
)
print(average_degree)

In [None]:
math.log(len(actors_graph.degree())) / math.log(average_degree)

In [None]:
# Test Small World
# Average clustering coefficient
clustering_coefficient = nx.average_clustering(actors_graph)
mean_shortest_path = nx.average_shortest_path_length(actors_graph)

# Erdős–Rényi to test
n = len(actors_graph.nodes)
m = actors_graph.size()
p = 2 * m / (n * (n - 1))
G_er = nx.erdos_renyi_graph(n, p)
clustering_coefficient_er = nx.average_clustering(G_er)
mean_shortest_path_er = nx.average_shortest_path_length(actors_graph)

In [None]:
clustering_coefficient / clustering_coefficient_er

In [None]:
mean_shortest_path / mean_shortest_path_er

Thus, it means that the network is a small world, due to the comparison between network statistics like clustering coefficient and the mean shortest path. The comparison is done with a similar Erdős–Rényi generated network. Small networks should have some spatial structure, that is reflected on a bigger clustering coefficient.

# Properties of nodes

In [None]:
# Average clustering coefficient
print(clustering_coefficient)

In [None]:
# Degree centrality sorted
degree_centrality = nx.degree_centrality(actors_graph)
sorted_centrality = sorted(
    degree_centrality.items(), key=lambda kv: kv[1], reverse=True
)

In [None]:
# Actors that have more dense connections
actors_col = "actor_name"
for node_id, centrality in sorted_centrality[:5]:
    print(actors_agg_df.loc[node_id, actors_col], centrality)

In [None]:
# Actors that does not have dense connections
for node_id, centrality in sorted_centrality[-5:]:
    print(actors_agg_df.loc[node_id, actors_col], centrality)

In [None]:
# hubs
degrees_np = np.array(degree_list)

# nodes that have degree greater than average
indexes = np.where(degrees_np > average_degree)[0]
print(f"Actor's hubs: number {len(indexes)}")
actors_agg_df.loc[np.array(indexes), actors_col]