# Small graphs

Here, we'll make a few small graphs and display them with `cosmograph`

In [34]:
min_version = '0.0.20'

try:
    from packaging import version
    cosmograph_version = getattr(__import__('cosmograph'), '__version__', None)
    if cosmograph_version and version.parse(cosmograph_version) < version.parse(min_version):
        print(f'Your cosmograph version was less than {min_version}. Upgrading to latest version...')
        %pip install -U --quiet cosmograph
except (ImportError, ModuleNotFoundError):
    print('Cosmograph not installed. Will install latest version...')
    %pip install --quiet cosmograph

In [3]:
import pandas as pd
from cosmograph import cosmo

In [None]:
single_link = {
    'points': [{'id': '0'}, {'id': '1'}],
    'links': [{'source': '0', 'target': '1'}],
}

points, links = map(pd.DataFrame, single_link.values())

g = cosmo(
    points=points, 
    links=links,
    point_id_by='id',
    link_source_by='source',
    link_target_by='target',
    simulation_gravity=0,
    simulation_center=1,
)
g

Cosmograph(background_color=None, focused_point_ring_color=None, hovered_point_ring_color=None, link_color=Non…

If your graph is small, it will tend to drift away (this is why we put an explicit `simulation_gravity` and `simulation_center`, to reduce that effect). But know this, you can always fit the view (`fit_view()`), to recenter things (and actually see your graph!).

In [21]:
g.fit_view()

We'll use the `linked` package to make some graphs in a quick and agreeable way.
If you don't have `linked` you can `pip install linked`. 

In [None]:
from functools import partial
from linked import mini_dot_to_graph_jdict

# customize the mini_dot_to_graph_jdict to use the field names we'll be using here
# (You're basically saying "this is what I want you to call nodes, links, etc.")
my_mini_dot = partial(
    mini_dot_to_graph_jdict,
    field_names=dict(
        nodes='points', links='links', source='source', target='target', id='id'
    ),
)

def points_and_links_from_mini_dot(mini_dot_str: str):
    """
    Make points and links data from a mini_dot string and return them as DataFrames.
    """
    g = my_mini_dot(mini_dot_str)
    points, links = map(pd.DataFrame, [g['points'], g['links']])
    return points, links



In [23]:
from cosmograph.base import process_cosmo_input

process_cosmo_input(None, {})

{}

In [15]:

from cosmograph import cosmo
cosmo(
    points=points,
    links=links,
    point_id_by='id',
    link_source_by='source',
    link_target_by='target',
)

Unnamed: 0,id
0,1
1,2
2,5
3,6
4,7
5,3


In [16]:
links

Unnamed: 0,source,target
0,1,2
1,2,5
2,2,6
3,2,7
4,3,5
5,3,6
6,3,7


In [7]:
links

Unnamed: 0,source,sourceidx,date,time,color,target,targetidx,value
0,0,0,2012-02-08 11:17:27.900000+00:00,1970-01-01T01:06:54.149Z,#4e79a7,1,1,1
1,0,0,2012-02-08 11:17:27.900000+00:00,1970-01-01T01:06:54.149Z,#4e79a7,92,92,9
2,1,1,2012-10-10 20:58:18.664000+00:00,1970-01-01T01:33:27.754Z,#af7aa1,2,2,3
3,1,1,2012-10-10 20:58:18.664000+00:00,1970-01-01T01:33:27.754Z,#af7aa1,93,93,4
4,2,2,2013-01-20 10:49:31.343000+00:00,1970-01-01T02:25:52.892Z,#ff9da7,3,3,6
...,...,...,...,...,...,...,...,...
16373,8274,8274,2016-04-18 22:18:42.012000+00:00,1970-01-01T00:25:07.895Z,#e15759,8275,8275,7
16374,8275,8275,2016-01-24 21:27:19.899000+00:00,1970-01-01T01:23:04.457Z,#e15759,8276,8276,8
16375,8276,8276,2013-09-30 11:11:20.394000+00:00,1970-01-01T02:40:55.245Z,#9c755f,8277,8277,1
16376,8277,8277,2016-07-25 12:42:05.251000+00:00,1970-01-01T00:34:30.523Z,#af7aa1,8278,8278,7


In [1]:
import pandas as pd

points = pd.read_json('https://gist.githubusercontent.com/Stukova/d2a3bb22c90680b6beaf8a52c3470296/raw/a80fb63f4f4f0135f827c23aff43183de61a153d/points.json', dtype={'id': str})
print(f"{points.shape=}")
links = pd.read_json('https://gist.githubusercontent.com/Stukova/d2a3bb22c90680b6beaf8a52c3470296/raw/a80fb63f4f4f0135f827c23aff43183de61a153d/links.json', dtype={'sourceidx': str, 'targetidx': str})
print(f"{links.shape=}")

from cosmograph_widget import Cosmograph

Cosmograph(
    points=points,
    links=links,
    point_id_by='id',
    link_source_by='source',
    link_target_by='target',
    point_include_columns=['value'],
    point_label_by='label',
    link_include_columns=['value'],
)

points.shape=(8280, 8)
links.shape=(16378, 8)


Cosmograph(background_color=None, focused_point_ring_color=None, hovered_point_ring_color=None, link_color=Non…

In [2]:
from cosmograph_widget import Cosmograph

Cosmograph(
    points=points,
    links=links,
    point_id_by='id',
    link_source_by='source',
    link_target_by='target',
    point_include_columns=['value'],
    point_label_by='label',
    link_include_columns=['value'],
)

ModuleNotFoundError: No module named 'cosmograph_widget'

In [9]:
def _get_pkg_dir_and_name(pkg_dir):
    import os
    pkg_dir = os.path.realpath(pkg_dir)
    if pkg_dir.endswith(os.sep):
        pkg_dir = pkg_dir[:-1]
    pkg_dirname = os.path.basename(pkg_dir)
    return pkg_dir, pkg_dirname

import cosmograph
cosmograph.__file__

_get_pkg_dir_and_name('/Users/thorwhalen/Dropbox/py/proj/c/')
from wads.pack import folders_that_have_init_py_files

folders_that_have_init_py_files('/Users/thorwhalen/Dropbox/py/proj/c/cosmograph')

[]

In [1]:
from cosmograph_widget import Cosmograph

Cosmograph(
    points=points,
    links=links,
    point_id_by='id',
    link_source_by='source',
    link_target_by='target',
    point_include_columns=['value'],
    point_label_by='label',
    link_include_columns=['value'],
)

NameError: name 'points' is not defined

In [1]:
from linked import mini_dot_to_graph_jdict

In [5]:
from cosmograph import cosmo
import pandas as pd

points = pd.DataFrame({
    'id': [1, 2, 3, 4, 5],
    'label': ['Node A', 'Node B', 'Node C', 'Node D', 'Node E'],
    'value': [10, 20, 15, 25, 30],
    'category': ['A', 'B', 'A', 'B', 'A']
})

links = pd.DataFrame({
    'source': [1, 2, 3, 1, 2],
    'target': [2, 3, 4, 5, 4],
    'value': [1.0, 2.0, 1.5, 0.5, 1.8]
})

widget = cosmo(
  points=points,
  links=links,
  point_id_by='id',
  link_source_by='source',
  link_target_by='target',
  point_color_by='category',
  point_include_columns=['value'],
  point_label_by='label',
  link_include_columns=['value'],
)
widget

Cosmograph(background_color=None, focused_point_ring_color=None, hovered_point_ring_color=None, link_color=Non…

In [8]:
t = """In the realm of data science, effective visualization is not just a tool for communication—it’s a window into discovery, uncovering insights and guiding analytical decisions. Visualization of large datasets has long required sophisticated workflows, but Cosmograph changes the game. This cutting-edge JavaScript library, now accessible through its Python wrapper, empowers Pythonistas, data scientists, and AI engineers to create stunning, interactive visualizations directly from their notebooks. With GPU-accelerated, force-directed layouts, Cosmograph handles millions of nodes and edges effortlessly, enabling real-time exploration of multidimensional data and complex networks. Its intuitive tools—zooming, panning, filtering—transform data chaos into clarity.

This session will explore why visualization is essential in modern data science workflows, especially in the age of embeddings, vector databases, and large language models (LLMs). Attendees will see Cosmograph in action through live demonstrations: clustering PyCon talks, analyzing cryptocurrency transactions, mapping social networks, and more. We’ll also delve into practical tools for integrating Cosmograph into Python workflows, from segmenting text and generating embeddings to projecting high-dimensional data into 2D layouts. No specialized expertise is required, though those working in data, machine learning, or AI will find it especially engaging. By the end of the talk, attendees will see how Cosmograph transforms data exploration into an engaging, interactive experience, empowering them to uncover stories hidden within their datasets."""


len(t.split())

213

In [4]:
mini_dot_to_graph_jdict("""
    1 -> 2
    2, 3 -> 5, 6, 7
""")

{'nodes': [{'id': '1'},
  {'id': '2'},
  {'id': '5'},
  {'id': '6'},
  {'id': '7'},
  {'id': '3'}],
 'links': [{'source': '1', 'target': '2'},
  {'source': '2', 'target': '5'},
  {'source': '2', 'target': '6'},
  {'source': '2', 'target': '7'},
  {'source': '3', 'target': '5'},
  {'source': '3', 'target': '6'},
  {'source': '3', 'target': '7'}]}

In [14]:
small_bipartite_graph = mini_dot_to_graph_jdict("""
    1, 2, 3, 4 -> 5, 6, 7
""")


pentagon = mini_dot_to_graph_jdict("""
    1 -> 2
    2 -> 3
    3 -> 4
    4 -> 5
    5 -> 1
""")

six_path = mini_dot_to_graph_jdict("""
    1 -> 2
    2 -> 3
    3 -> 4
    4 -> 5
    5 -> 6
""")


t = small_bipartite_graph
points, links = map(pd.DataFrame, [t['nodes'], t['links']])

g = cosmo(
    points=points, 
    links=links,
    point_id_by='id',
    link_source_by='source',
    link_target_by='target',
    simulation_gravity=0,
)
g


Cosmograph(background_color=None, focused_point_ring_color=None, hovered_point_ring_color=None, link_color=Non…

In [13]:
len("""In this tutorial, participants will learn the end-to-end pipeline for preparing and analyzing text data, from [acquisition and segmentation](https://www.geeksforgeeks.org/text-preprocessing-for-nlp-tasks/) to [embedding](https://www.analyticsvidhya.com/blog/2021/06/text-preprocessing-in-nlp-with-python-codes/) and [visualization](https://diogoribeiro7.github.io/natural%20language%20processing/text_preprocessing_techniques_nlp_data_science/). With the rise of [large language models (LLMs)](https://www.ibm.com/blog/retrieval-augmented-generation-RAG/) and the surrounding ecosystem of tools, understanding how to preprocess, organize, and explore text data has become a vital skill for data scientists and AI engineers.

We’ll begin by discussing software design principles that simplify interfacing with modern tools, enabling participants to maintain flexibility and swap out components as technologies evolve. The session then delves into practical steps: acquiring text data, segmenting it appropriately, and generating [embeddings](https://www.analyticsvidhya.com/blog/2021/06/text-preprocessing-in-nlp-with-python-codes/)—high-dimensional representations of text segments—using modern, cost-effective tools.

Participants will also explore practical applications of embeddings, including building search engines, analyzing relationships within corpora, and leveraging [vector databases](https://www.datastax.com/guides/what-is-retrieval-augmented-generation) for advanced tasks like [retrieval-augmented generation (RAG)](https://www.datacamp.com/blog/what-is-retrieval-augmented-generation-rag). These systems allow LLMs to tap into knowledge bases, making AI systems more robust and contextually aware.

Finally, the workshop will focus on preparing embeddings and metadata for interactive visualization, introducing the [Cosmograph library](https://www.geeksforgeeks.org/what-is-retrieval-augmented-generation-rag/). Participants will learn to transform multidimensional data into interactive visualizations that reveal hidden patterns, support storytelling, and facilitate sharing insights with collaborators or stakeholders.

By the end of this hands-on tutorial, attendees will not only have a practical understanding of the data preparation pipeline but also be equipped with strategies and tools to confidently handle text data workflows in modern AI systems. No advanced expertise is required, though familiarity with Python and basic data processing will be beneficial..""".split())

244

In [None]:

def path(self, n):
    return mini_dot_to_graph_jdict(
        "\n".join(f"{i} -> {i+1}" for i in range(1, n))
    )

def cycle(self, n):
    return mini_dot_to_graph_jdict(
        "\n".join(f"{i} -> {i+1}" for i in range(1, n))
        + f"\n{n} -> 1"
    )   

def bipartite(self, n, m):
    return mini_dot_to_graph_jdict(
        "\n".join(f"{i} -> {j}" for i in range(1, n+1) for j in range(n+1, n+m+1))
    )
