In [2]:
import json
import networkx as nx

with open("hollywood_graph.json") as f:
    graph_dict = json.load(f)

graph = nx.node_link_graph(graph_dict)

In [2]:
print("Is the graph connected?", nx.is_connected(graph))
print("Connected components:", nx.number_connected_components(graph))

Is the graph connected? False
Connected components: 869


In [3]:

print("GRAPH METRICS - HOLLYWOOD\n")
print("Number of nodes:", graph.number_of_nodes())
print("Number of edges:", graph.number_of_edges())

movie_nodes = {n for n, d in graph.nodes(data=True) if d.get("bipartite") == 0}
actor_nodes = {n for n, d in graph.nodes(data=True) if d.get("bipartite") == 1}

print(f"Movies: {len(movie_nodes)}")
print(f"Actors: {len(actor_nodes)}")
print("Graph density:", nx.bipartite.density(graph, list(actor_nodes)))

avg_actors_per_movie = graph.size() / len(movie_nodes)
avg_movies_per_actor = graph.size() / len(actor_nodes)

print(f"Average number of actors per movie: {round(avg_actors_per_movie,2)}")
print(f"Average number of movies per actor: {round(avg_movies_per_actor,2)}")
print("Is there any self loop?", len(list(nx.selfloop_edges(graph))))
if len(list(nx.selfloop_edges(graph))) > 0:
    print(list(nx.selfloop_edges(graph)))


GRAPH METRICS - HOLLYWOOD

Number of nodes: 131642
Number of edges: 329137
Movies: 20184
Actors: 111458
Graph density: 0.0001463046814930588
Average number of actors per movie: 16.31
Average number of movies per actor: 2.95
Is there any self loop? 0


In [4]:
movie_nodes = {n for n, d in graph.nodes(data=True) if d["bipartite"] == 0}
actor_nodes = set(graph) - movie_nodes
print("DEGREE CENTRALITY\n")
i = 0
actor_deg = dict(nx.bipartite.degrees(graph, movie_nodes)[0])
movie_deg = dict(nx.bipartite.degrees(graph, actor_nodes)[0])
centr = list(nx.bipartite.degree_centrality(graph, nodes=movie_nodes).items())
centr_actors = [tup for tup in centr if tup[0][:5] == "actor"]
centr_movies = [tup for tup in centr if tup[0][:5] == "movie"]
centr_movies = sorted(centr_movies, key=lambda x: x[1], reverse=True)
centr_actors = sorted(centr_actors, key=lambda x: x[1], reverse=True)
for movie in centr_movies[:10]:
    print(movie[0][6:], "-", "centrality :", round(movie[1], 6), ", cast :", movie_deg[movie[0]], "actors\n")
for actor in centr_actors[:10]:
    print(actor[0][6:], "-", "centrality :", round(actor[1], 6), ", appearances :", actor_deg[actor[0]], "movies\n")

DEGREE CENTRALITY

 Around the World in Eighty Days - centrality : 0.002799 , cast : 312 actors

 Les Mis√©rables - centrality : 0.002602 , cast : 290 actors

 Rock of Ages - centrality : 0.00201 , cast : 224 actors

 Alice in Wonderland - centrality : 0.00192 , cast : 214 actors

 Mr. Smith Goes to Washington - centrality : 0.001911 , cast : 213 actors

 Jason Bourne - centrality : 0.001866 , cast : 208 actors

 Beauty and the Beast - centrality : 0.00183 , cast : 204 actors

 Titanic - centrality : 0.001767 , cast : 197 actors

 Union Pacific - centrality : 0.001651 , cast : 184 actors

 You Don't Mess with the Zohan - centrality : 0.001642 , cast : 183 actors

 Bess Flowers - centrality : 0.011346 , appearances : 229 movies

 John Wayne - centrality : 0.006044 , appearances : 122 movies

 Samuel L. Jackson - centrality : 0.005747 , appearances : 116 movies

 John Carradine - centrality : 0.005103 , appearances : 103 movies

 Irving Bacon - centrality : 0.004608 , appearances : 93 mo

In [None]:
# main_component_nodes = list(nx.connected_components(graph))[0]
# main_component = graph.subgraph(main_component_nodes)