# Traverse: Community Detection on Co-occurrence Graphs

Detect structural communities in a genre/style co-occurrence graph using
NetworkX community-detection algorithms (Louvain, greedy modularity, label
propagation), then visualize the clusters in Cosmograph.

**Prerequisites:**
```bash
pip install -e ".[dev]"
cd src/traverse/cosmograph/app && npm install && npm run build
```

## 1. Configuration

In [None]:
from pathlib import Path

EXTENDED_DIR = Path(r"C:\Users\xtrem\Documents\Datasets\Spotify\anthony\ExtendedStreamingHistory")
RECORDS_CSV  = Path(r"C:\Users\xtrem\Documents\Datasets\records.csv")
CACHE_DIR    = Path("_out")
FORCE_REBUILD = False

## 2. Load and Cache Canonical Tables

In [None]:
from traverse.data.spotify_extended_minimal import load_spotify_extended_minimal
from traverse.processing.enrich_fast import FastGenreStyleEnricher
from traverse.processing.cache import CanonicalTableCache

cache = CanonicalTableCache(
    cache_dir=CACHE_DIR,
    build_fn=lambda: load_spotify_extended_minimal(EXTENDED_DIR),
    enrich_fn=lambda t: FastGenreStyleEnricher(records_csv=str(RECORDS_CSV)).run(t),
    force=FORCE_REBUILD,
)
plays_wide, tracks_wide = cache.load_or_build()

print(f"plays_wide:  {plays_wide.shape[0]:,} rows, {plays_wide.shape[1]} cols")
print(f"tracks_wide: {tracks_wide.shape[0]:,} rows, {tracks_wide.shape[1]} cols")
plays_wide.head(3)

## 3. Build Co-occurrence Graph

In [None]:
import pandas as pd
from traverse.processing.normalize import split_tags, pretty_label
from traverse.graph.cooccurrence import CooccurrenceBuilder

builder = CooccurrenceBuilder(min_cooccurrence=2, max_nodes=500)

for played_at, genres, styles in plays_wide[
    ["played_at", "genres", "styles"]
].itertuples(index=False):
    genre_tags = split_tags(genres)
    style_tags = split_tags(styles)
    tags = genre_tags + style_tags

    tag_categories = {}
    for t in genre_tags:
        tag_categories[t] = "genre"
    for t in style_tags:
        tag_categories[t] = "style"

    ts_ms = (
        int(pd.Timestamp(played_at).value // 1_000_000)
        if pd.notna(played_at)
        else None
    )
    builder.add(tags, timestamp_ms=ts_ms, label_fn=pretty_label,
                tag_categories=tag_categories)

graph = builder.build()
print(f"Graph: {len(graph['points'])} nodes, {len(graph['links'])} edges")

## 4. Convert to NetworkX and Run All 3 Algorithms

In [None]:
from traverse.graph.community import (
    CommunityAlgorithm,
    cooccurrence_to_networkx,
    detect_communities,
)

G = cooccurrence_to_networkx(graph)
print(f"NetworkX graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")

for algo in CommunityAlgorithm:
    assignments = detect_communities(G, algo, seed=42)
    n_comms = len(set(assignments.values()))
    print(f"  {algo.value:25s} -> {n_comms} communities")

## 5. Apply Louvain Communities to Graph

In [None]:
from traverse.graph.community import add_communities

graph_with_communities = add_communities(
    graph, CommunityAlgorithm.LOUVAIN, seed=42,
)

# Show community distribution
from collections import Counter
comm_counts = Counter(pt["community"] for pt in graph_with_communities["points"])
print(f"{len(comm_counts)} communities:")
for comm_id, count in comm_counts.most_common():
    print(f"  Community {comm_id}: {count} nodes")

## 6. Export JSON and Serve

Write the community-labeled graph with `meta.clusterField = "community"`,
then start the static server.

In [None]:
from traverse.graph.adapters_cosmograph import CosmographAdapter
from traverse.cosmograph.server import serve, _default_dist_dir

meta = {"clusterField": "community"}
out_path = _default_dist_dir() / "cosmo_community.json"
CosmographAdapter.write(graph_with_communities, out_path, meta=meta)
print(f"Wrote {out_path}")
print()
print("Starting server â€” open in browser:")
print("  http://127.0.0.1:8080/?data=/cosmo_community.json")
print()
print("Press Ctrl+C (or interrupt the kernel) to stop.")

serve(port=8080)

---

## Appendix: PyCosmograph Inline Widget

Render the community-clustered graph directly in the notebook.

In [None]:
# pip install cosmograph  # uncomment to install
import pandas as pd
from cosmograph import cosmo

points_df = pd.DataFrame(graph_with_communities["points"])
links_df = pd.DataFrame(graph_with_communities["links"])

# Convert community int to string for categorical coloring
points_df["community"] = points_df["community"].astype(str)

BRIGHT_PALETTE = [
    "#00e5ff",  # cyan
    "#ff4081",  # pink
    "#76ff03",  # lime
    "#ffea00",  # yellow
    "#e040fb",  # purple
    "#ff6e40",  # orange
]

w = cosmo(
    points=points_df,
    links=links_df,
    point_id_by="id",
    link_source_by="source",
    link_target_by="target",
    point_label_by="label",
    link_include_columns=["weight"],
    point_size=0.2,
    show_labels=True,
    # Smoother simulation startup
    simulation_friction=0.7,
    simulation_decay=3000,
    # Curved edges
    curved_links=True,
    curved_link_segments=19,
    curved_link_weight=0.8,
    curved_link_control_point_distance=0.5,
    # Clustering by community
    point_color_by="community",
    point_color_palette=BRIGHT_PALETTE,
    point_cluster_by="community",
    simulation_cluster=0.8,
    show_cluster_labels=True,
    scale_cluster_labels=True,
    use_point_color_strategy_for_cluster_labels=True,
    point_include_columns=["community"],
)
w  # renders inline