In [1]:
import math

import networkx as nx
from bokeh.io import output_notebook, show
from bokeh.models import Circle, MultiLine, Range1d
from bokeh.palettes import Blues8
from bokeh.plotting import figure, from_networkx
from bokeh.transform import linear_cmap

from rs_graph.data import load_rs_graph_upstream_deps_dataset

###############################################################################

# Bokeh output to notebook
output_notebook()

In [2]:
# Load dataset sample
df = load_rs_graph_upstream_deps_dataset()

# Filter out non-pip registries
df = df.loc[df.upstream_dep_registry == "pip"]

In [3]:
# Most machines can't render full dataset, reduce
sampled_df = df.sample(5000)

# Create graph
graph = nx.from_pandas_edgelist(sampled_df, "upstream_dep_name", "repo")

# Add dep_count_of_upstream_dep to node attrs
upstream_dep_value_counts = sampled_df.upstream_dep_name.value_counts()
maximum_count = upstream_dep_value_counts.max()

# Create dependent dict
unique_upstreams = sampled_df.upstream_dep_name.unique()
dependents = {}
normalized_dependents = {}
for node in graph.nodes():
    if node in unique_upstreams:
        upstream_deps_count = upstream_dep_value_counts[node]
        dependents[node] = upstream_deps_count
        normalized_dependents[node] = math.ceil(
            (upstream_deps_count / maximum_count) * 20
        )
    else:
        dependents[node] = 1
        normalized_dependents[node] = 1

# Set the dependents value
nx.set_node_attributes(
    graph,
    name="dependents",
    values=dependents,
)
nx.set_node_attributes(
    graph,
    name="normalized_dependents",
    values=normalized_dependents,
)

# Create a plot — set dimensions, toolbar, and title
plot = figure(
    tooltips=[("Package", "@index"), ("Dependents", "@dependents")],
    tools="pan,wheel_zoom,save,reset",
    active_scroll="wheel_zoom",
    x_range=Range1d(-10.1, 10.1),
    y_range=Range1d(-10.1, 10.1),
)

# Create a network graph object
network_graph = from_networkx(graph, nx.spring_layout, scale=10, center=(0, 0))

# Set node size and color
# Set node sizes and colors according to node degree
# (color as spectrum of color palette)
minimum_value_color = min(
    network_graph.node_renderer.data_source.data["normalized_dependents"]
)
maximum_value_color = max(
    network_graph.node_renderer.data_source.data["normalized_dependents"]
)
network_graph.node_renderer.glyph = Circle(
    size="normalized_dependents",
    line_alpha=0,
    fill_color=linear_cmap(
        "normalized_dependents", Blues8, minimum_value_color, maximum_value_color
    ),
)

# Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(
    line_alpha=0.2, line_width=1, line_color="lightgrey"
)

# Add network graph to the plot
plot.renderers.append(network_graph)

# Render
show(plot)