In [2]:
import igraph as ig
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from igraph import Graph, plot

In [90]:
g_crates = ig.Graph.Read_GraphML("../../networks/crates_io.graphml")
g_python = ig.Graph.Read_GraphML("../../networks/python-dependencies.graphml")
g_npm = ig.Graph.Read_GraphML("../../networks/npm_graph_full.graphml")

  g_crates = ig.Graph.Read_GraphML("../../networks/crates_io.graphml")


In [46]:
g_crates.simplify()
g_python.simplify()
g_npm.simplify()

<igraph.Graph at 0x7f3ef11c7950>

In [58]:
def basic_stats(g):
    print("Number of vertices: ", g.vcount())
    print("Number of edges: ", g.ecount())
    print("Density: ", g.density())
    print("Isolated vertices: ", g.isolated_vertices())

In [91]:
def degree_analysis(g):
    degrees = g.degree()
    distribution = g.degree_distribution()
    print("Max degree: ", max(degrees))
    print("Min degree: ", min(degrees))
    print("Mean degree: ", sum(degrees) / len(degrees))
    print(
        "Degree distribution: ",
        f"mean: {distribution.mean}, sd: {distribution.sd}, variance: {distribution.var}",
    )

Number of vertices:  145269
Number of edges:  830376
Density:  3.9348835490524716e-05
Max degree:  37632
Min degree:  0
Mean degree:  11.432253268075089
Degree distribution:  mean: 11.43225326807511, sd: 187.20010975512147, variance: 35043.881092329524
Number of vertices:  145269
Number of edges:  830337
Density:  3.934698740654333e-05
Max degree:  37632
Min degree:  0
Mean degree:  11.431716333147472
Degree distribution:  mean: 11.431716333147309, sd: 187.1998318166331, variance: 35043.77703217571


In [None]:
def path_analysis(g):
    print("Diameter: ", g.diameter())
    print("Average path length: ", g.average_path_length())

In [87]:
def clustering_analysis(g):
    print(
        "Global clustering coefficient: ",
        g.transitivity_avglocal_undirected(mode="zero"),
    )
    print("transitivity", g.transitivity_undirected())

Global clustering coefficient:  0.1764130391404384
transitivity 0.00019366561746196487


In [62]:
def components_analysis(g):
    components = g.components(mode="weak")
    isolated = components.sizes().count(1)
    print("Number of isolated vertices: ", isolated)
    print("Number of connected components: ", components.summary())
    print("Largest connected component: ", components.giant().vcount())

In [63]:
def centrality_analysis(g):
    betweenness = g.betweenness()
    highest_betweenness = g.vs.select(_betweenness=max(betweenness))
    print(
        f'Betweenness centrality: {np.mean(betweenness)}, highest betweenness: {highest_betweenness["id"]}'
    )

    closeness = g.closeness()
    highest_closeness = g.vs.select(_closeness=max(closeness))
    print(
        f'Closeness centrality: {np.mean(closeness)}, highest closeness: {highest_closeness["id"]}'
    )

    pagerank = g.pagerank()
    highest_pagerank = g.vs.select(_pagerank=max(pagerank))
    print(
        f'Pagerank centrality: {np.mean(pagerank)}, highest pagerank: {highest_pagerank["id"]}'
    )

In [64]:
def visualize(g):
    visual_style = {}
    visual_style["vertex_size"] = 20
    visual_style["vertex_label"] = g.vs["id"]
    visual_style["edge_width"] = 1
    visual_style["layout"] = g.layout("kk")  # Kamada-Kawai layout
    plot(g, **visual_style)

In [65]:
basic_stats(g_crates)

Number of vertices:  145269
Number of edges:  830337
Density:  3.934698740654333e-05


In [66]:
degree_analysis(g_crates)

Max degree:  37632
Min degree:  0
Mean degree:  11.431716333147472
Degree distribution:  mean: 11.431716333147309, sd: 187.1998318166331, variance: 35043.77703217571


In [67]:
path_analysis(g_crates)

Diameter:  24
Average path length:  7.58761621807268


In [76]:
clustering_analysis(g_crates)

Global clustering coefficient:  0.306198573203652
transitivity 0.0013171994730663353


In [69]:
components_analysis(g_crates)

Number of connected components:  Clustering with 145269 elements and 25179 clusters
Largest connected component:  119461
