In [1]:
import json
import networkx as nx
with open("hollywood_graph.json") as f:
    graph_dict = json.load(f)

graph = nx.node_link_graph(graph_dict)

In [2]:

print("GRAPH METRICS - HOLLYWOOD\n")
print("Number of nodes:", graph.number_of_nodes())
print("Number of edges:", graph.number_of_edges())

movie_nodes = {n for n, d in graph.nodes(data=True) if d.get("bipartite") == 0}
actor_nodes = {n for n, d in graph.nodes(data=True) if d.get("bipartite") == 1}

print(f"Movies: {len(movie_nodes)}")
print(f"Actors: {len(actor_nodes)}")
print("Graph density:", nx.bipartite.density(graph, list(actor_nodes)))

avg_actors_per_movie = graph.size() / len(movie_nodes)
avg_movies_per_actor = graph.size() / len(actor_nodes)

print(f"Average number of actors per movie: {round(avg_actors_per_movie,2)}")
print(f"Average number of movies per actor: {round(avg_movies_per_actor,2)}")
print("Is there any self loop?", len(list(nx.selfloop_edges(graph))))
if len(list(nx.selfloop_edges(graph))) > 0:
    print(list(nx.selfloop_edges(graph))) 

GRAPH METRICS - HOLLYWOOD

Number of nodes: 131642
Number of edges: 329137
Movies: 20184
Actors: 111458
Graph density: 0.0001463046814930588
Average number of actors per movie: 16.31
Average number of movies per actor: 2.95
Is there any self loop? 0


In [3]:
actors_graph = nx.projected_graph(graph, actor_nodes)
actors_nodes = actors_graph.nodes()
actors_edges = actors_graph.edges()
print(len(actors_graph.edges()))
for actor in actor_nodes:
    if actor[:5] == "movie":
        print(actor)

4872779


In [4]:
print("ASSORTATIVITY BY GENRE - The similarity of genres among actors is: ")
print(round(nx.attribute_assortativity_coefficient(actors_graph, "genre")*100,2), "%")

print("ASSORTATIVITY BY PERIOD - The period similarity among actors is: ")
print(round(nx.attribute_assortativity_coefficient(actors_graph, "period")*100,2), "%")



ASSORTATIVITY BY GENRE - The similarity of genres among actors is: 
30.92 %
ASSORTATIVITY BY PERIOD - The period similarity among actors is: 
80.14 %


In [5]:
import igraph as ig
import networkx as nx

# Supponiamo che il tuo grafo si chiami 'actors_graph'
# Nota: igraph non ama gli attributi complessi (come le liste), 
# quindi assicurati che i nodi abbiano attributi semplici (stringhe, int).

# Verifica la conversione
# print(f"Nodi in igraph: {g_ig.vcount()}")
# print(f"Archi in igraph: {g_ig.ecount()}")

In [6]:
import igraph as ig
print("BETWEENNESS CENTRALITY - which actor acts as the biggest bridge?")
actors_graph_ig = ig.Graph.from_networkx(actors_graph)
# betweenness = actors_graph_ig.betweenness(cutoff=3)
# print(betweenness)

BETWEENNESS CENTRALITY - which actor acts as the biggest bridge?


In [None]:
# 1. Prendi solo la componente più grande (riduce i calcoli inutili)
giant = actors_graph_ig.connected_components().giant()

# 2. Usa un cutoff molto basso (es. 2) 
# Questo cattura solo i "ponti" immediati tra gruppi di attori.
# In un network di attori, chi ha un grado altissimo è spesso anche un nodo con alta betweenness.
degrees = giant.degree()
print(degrees)

Calcolo Centralità Harmonica (molto più veloce)...
[70, 133, 43, 17, 82, 12, 72, 25, 41, 34, 20, 53, 273, 60, 92, 15, 36, 140, 74, 487, 15, 460, 12, 101, 117, 43, 70, 64, 20, 15, 40, 110, 46, 11, 191, 66, 23, 125, 49, 5, 297, 69, 45, 12, 68, 34, 15, 23, 9, 85, 70, 112, 89, 81, 151, 284, 9, 221, 482, 85, 148, 18, 40, 9, 95, 15, 9, 13, 30, 13, 9, 82, 292, 74, 329, 729, 69, 13, 11, 13, 7, 43, 143, 71, 471, 6, 58, 5, 34, 73, 7, 12, 89, 54, 63, 62, 24, 38, 109, 44, 53, 120, 75, 166, 94, 55, 5, 124, 18, 10, 22, 79, 35, 1015, 11, 938, 42, 20, 31, 121, 28, 130, 22, 30, 100, 313, 84, 115, 14, 38, 19, 31, 71, 108, 6, 13, 171, 29, 53, 256, 111, 27, 127, 38, 16, 29, 9, 11, 10, 152, 41, 4, 42, 26, 29, 39, 53, 8, 41, 32, 41, 113, 24, 32, 33, 12, 17, 195, 55, 65, 17, 13, 20, 78, 23, 147, 163, 9, 148, 41, 98, 45, 83, 15, 73, 84, 10, 12, 14, 19, 76, 113, 85, 14, 29, 30, 19, 130, 88, 75, 76, 47, 17, 63, 7, 64, 116, 40, 208, 25, 7, 14, 17, 33, 30, 19, 27, 62, 72, 19, 31, 81, 57, 42, 84, 19, 40, 58, 39, 9

In [11]:
from igraph import VertexSeq
print(type(giant))
giant_nodes = VertexSeq(giant)
print(len(giant_nodes))

<class 'igraph.Graph'>
109623


In [9]:
# pagerank
pagerank = actors_graph_ig.pagerank()
print(pagerank)

[7.103552847615468e-06, 1.0865160821302064e-05, 6.275535226688286e-06, 4.227731815206786e-06, 8.04520116977536e-06, 8.98157878191771e-06, 2.5343739222359714e-06, 6.801530969835437e-06, 2.8563596584098054e-06, 5.744806963507193e-06, 5.222337559718258e-06, 3.149937397581159e-06, 7.970928454457734e-06, 2.55203472120696e-05, 7.882942221138575e-06, 1.370445022602184e-05, 2.945114890868378e-06, 5.006917651993558e-06, 1.4069673430364038e-05, 7.530630482060928e-06, 3.504453930548522e-05, 3.1282048615546415e-06, 4.0947139072496655e-05, 2.77955575037624e-06, 1.0353724714129135e-05, 1.071977721631943e-05, 5.500787617004816e-06, 8.981578781913839e-06, 7.241436192185139e-06, 6.402718636337292e-06, 4.5512124353386404e-06, 2.6725570833281176e-06, 4.642393766696688e-06, 9.56102134781207e-06, 5.430657177211432e-06, 3.0261985069178765e-06, 1.471197415447984e-05, 6.732756542796989e-06, 3.890781955438153e-06, 1.2684639577178122e-05, 6.290378683955575e-06, 3.1325024669830473e-06, 2.9413468572151357e-05, 7.

In [None]:
print(actors_graph.connected_components())

In [None]:
# print("BETWEENNESS CENTRALITY - which actor acts as the biggest bridge?")
# betweenness = list(nx.betweenness_centrality(graph, k=1000, normalized = True, endpoints = False)).items()
# betweenness = sorted(betweenness, key = lambda x: x[1])
# print(betweenness[:10])


In [None]:
# closeness_harmonic = list(nx.harmonic_centrality(graph).items())
# closeness_harmonic = sorted(closeness_harmonic, key = lambda x: x[1])
# print(closeness_harmonic[:10])