In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import networkx as nx
import seaborn as sns
import collections

from project_utils import attrs_to_graph, gefx_compatible

%reload_ext autoreload
%autoreload 2
%reload_ext lab_black

sns.set()

# Diameter of the graph

In [2]:
actors_agg_adj = np.load("sparse_agg_actor_adj.npy")
actors_graph = nx.from_numpy_matrix(actors_agg_adj)

In [None]:
print(f"The diameter of the graph is: {nx.diameter(actors_graph)}")

# Sparsity of the graph

#### HeatMap

In [None]:
plt.figure(figsize=(15, 10))
sns.heatmap(actors_agg_adj)
plt.savefig("plots/adj_actors_heatmap.pdf")

### Sparsity 

In [None]:
plt.figure(figsize=(15, 10))
plt.spy(actors_agg_adj)
plt.title("Sparsity of Adjj Matrix")
plt.savefig("plots/sparsity_adjj.pdf")

# Connected components

In [None]:
connected_components = nx.number_connected_components(actors_graph)
connected_components

# Degree Distribution

In [None]:
degree_sequence = sorted([d for n, d in actors_graph.degree()], reverse=True)

plt.figure(figsize=(15, 10))
plt.title("Degree Distribution")
plt.xlabel("Degree")
plt.ylabel("Frequency Normalized")
ax = sns.distplot(degree_sequence, kde=True)
plt.savefig("plots/degree_distribution.pdf")

# Spectrum

In [None]:
lamb_comb = nx.laplacian_spectrum(actors_graph)
lamb_norm = nx.normalized_laplacian_spectrum(actors_graph)

In [None]:
spectrum = pd.DataFrame({"laplacian": lamb_comb, "normalized_laplacian": lamb_norm})
spectrum

In [None]:
plt.figure(figsize=(15, 10))
plt.xlabel("Index")
plt.ylabel("Eigenvalue")
plt.title("Eigenvalues $L_{comb}$ and $L_{norm}$")
sns.lineplot(data=spectrum)
plt.savefig("plots/both_spectrum.pdf")

In [None]:
plt.figure(figsize=(15, 10))
plt.xlabel("Index")
plt.ylabel("Eigenvalue")
plt.title("Eigenvalues $L_{comb}$")
sns.lineplot("index", "laplacian", data=spectrum.reset_index())
plt.savefig("plots/laplacian_spectrum.pdf")

In [None]:
plt.figure(figsize=(15, 10))
plt.xlabel("Index")
plt.ylabel("Eigenvalue")
plt.title("Eigenvalues $L_{norm}$")
sns.lineplot("index", "normalized_laplacian", data=spectrum.reset_index())
plt.savefig("plots/normalized_laplacian_spectrum.pdf")

# Average Degree

In [None]:
average_degree = nx.average_degree_connectivity(actors_graph)
# average_degree
dict(list(average_degree.items())[0:10])

# Save for Gephi

In [None]:
# adds attributes to graph before saving it
actors_agg_df = pd.read_pickle("actors_agg_df.pkl")

# Add name and gender as node attribute
columns_of_type_set = [
    "cast",
    "crew",
    "movie_id",
    "genres",
    "keywords",
    "original_language",
    "production_companies",
    "production_countries",
    "spoken_languages",
    "title",
    "release_date",
    "status",
]

for col in columns_of_type_set:
    actors_agg_df[col] = actors_agg_df[col].map(gefx_compatible)

actors_agg_df = actors_agg_df.rename(columns={"actors": "name"})
attrs_to_graph(actors_graph, actors_agg_df)

In [None]:
# write in gefx format
DATA_PATH = "data"
nx.write_gexf(actors_graph, f"{DATA_PATH}/louvain_graph.gexf")

# Plot the Graph

In [None]:
nx.draw(actors_graph)

In [None]:
nx.draw_networkx(actors_graph)

In [None]:
# # Graph-tool
# g = Graph()
# g.add_edge_list(np.transpose(actors_agg_adj[100:200, 100:200].nonzero()))

In [None]:
# gt.draw.graph_draw(g, fmt="png", output_size=(600, 600))

In [None]:
Gc_actors = max(nx.connected_component_subgraphs(actors_graph), key=len)
nb_edges_gc = Gc_actors.number_of_edges()
nb_nodes_gc = Gc_actors.number_of_nodes()
coords_Gc = nx.spring_layout(Gc_actors, k=0.03)

In [None]:
plt.figure(figsize=(28, 7))
im = nx.draw_networkx_nodes(
    Gc_actors, coords_Gc, node_size=10, cmap="tab20", vmin=0, vmax=20
)
nx.draw_networkx_edges(Gc_actors, coords_Gc, alpha=0.1, width=0.7)
plt.title("Graph, with {} edges and {} nodes".format(nb_edges_gc, nb_nodes_gc))
plt.colorbar(im)
plt.savefig("plots/graph")
plt.show()