# Front end visualization

Here we'd like to develop a webpage visualization for the network. In particular, we'd like to use JavaScript and use a force graph in 3 dimensions to map out all models. Then we'd like an interface that allows the user to zoom in and out and pan around the network.

In [3]:
import numpy as np
import pandas as pd
import pickle
import networkx as nx
import ast
from itertools import combinations
import matplotlib.pyplot as plt
from ordering_helpers import get_trait_list, get_trait_counter, get_trait_graph, append_total_appearances, get_trait_ratios, get_oriented_trait_graph, get_top_trait_graph, solve_max_compatible_ordering, solve_weighted_compatible_ordering, get_violating_edges, get_compatible_and_total_traffic
import random

import json

ModuleNotFoundError: No module named 'numpy'

In [2]:
with open('data/ai_ecosystem_graph_nomerges.pkl', 'rb') as f:
    G = pickle.load(f)

print(f"Graph loaded: {len(G.nodes())} nodes, {len(G.edges())} edges")

Graph loaded: 1860411 nodes, 533295 edges


In [None]:
# Quickly get a list of about 10 models that have children.
# ['OpenRLHF/Llama-3-8b-sft-mixture'
# , 'google/vit-hybrid-base-bit-384'
# , 'migtissera/Tess-2.0-Llama-3-8B'
# , 'Thejina/llama-lora-medical'
# , 'Buseak/md_mt5_0109_v4'
# , 'AmberYifan/Gemma-2-9B-sft-SPIN-gpt4o'
# , 'yamatazen/Orihime-Gutenberg-12B'
# , 'LeroyDyer/_Spydaz_Web_ONTOLOGY_OFFICER_', 'ibivibiv/orthorus-125b-v2', 'knifeayumu/Cydonia-v1.2-v1.3-Magnum-v4-22B']


['OpenRLHF/Llama-3-8b-sft-mixture', 'google/vit-hybrid-base-bit-384', 'migtissera/Tess-2.0-Llama-3-8B', 'Thejina/llama-lora-medical', 'Buseak/md_mt5_0109_v4', 'AmberYifan/Gemma-2-9B-sft-SPIN-gpt4o', 'yamatazen/Orihime-Gutenberg-12B', 'LeroyDyer/_Spydaz_Web_ONTOLOGY_OFFICER_', 'ibivibiv/orthorus-125b-v2', 'knifeayumu/Cydonia-v1.2-v1.3-Magnum-v4-22B']


In [3]:
# Export the FULL graph (all nodes and edges) for dynamic component selection
def export_full_graph_to_json(G, output_file='graph_data.json',
                                include_attributes=['likes', 'downloads', 'createdAt', 'pipeline_tag', 'library_name']):
    """
    Export the entire graph (all nodes and edges) to JSON.
    The front-end will dynamically select connected components based on user search.
    
    Parameters:
    - G: networkx graph
    - output_file: output JSON file path
    - include_attributes: list of node attributes to include
    """
    print(f"Exporting full graph: {len(G.nodes())} nodes and {len(G.edges())} edges")
    
    # Prepare nodes data
    nodes_data = []
    for node_id in G.nodes():
        node_data = {
            'id': node_id,
            'name': node_id.split('/')[-1] if '/' in node_id else node_id  # Short name
        }
        
        # Add requested attributes
        for attr in include_attributes:
            if attr in G.nodes[node_id]:
                value = G.nodes[node_id][attr]
                # Handle NaN and None values
                if pd.isna(value) if hasattr(pd, 'isna') else (value != value):
                    node_data[attr] = None
                else:
                    node_data[attr] = value
        
        node_data['size'] = 1.0
        node_data['downloads'] = G.nodes[node_id].get('downloads', 0)
        node_data['likes'] = G.nodes[node_id].get('likes', 0)
        
        nodes_data.append(node_data)
    
    # Prepare edges data
    edges_data = []
    for source, target in G.edges():
        edge_data = {
            'source': source,
            'target': target
        }
        
        # Add edge attributes
        if 'edge_type' in G.edges[source, target]:
            edge_data['type'] = G.edges[source, target]['edge_type']
        elif 'edge_types' in G.edges[source, target]:
            edge_types = G.edges[source, target]['edge_types']
            edge_data['type'] = edge_types[0] if edge_types else 'unknown'
        else:
            edge_data['type'] = 'unknown'
        
        edges_data.append(edge_data)
    
    # Create final JSON structure
    graph_json = {
        'nodes': nodes_data,
        'edges': edges_data,
        'metadata': {
            'total_nodes': len(nodes_data),
            'total_edges': len(edges_data),
            'full_graph': True
        }
    }
    
    # Write to JSON file
    with open(output_file, 'w') as f:
        json.dump(graph_json, f, indent=2)
    
    print(f"Full graph exported to {output_file}")
    return graph_json

# Export the full graph
graph_data = export_full_graph_to_json(
    G,
    output_file='graph_data.json',
    include_attributes=['likes', 'downloads', 'createdAt', 'pipeline_tag', 'library_name']
)

print(f"\nFull graph exported. The visualization tool will now allow you to:")
print(f"1. Search for any model ID")
print(f"2. Automatically find and display its connected component")
print(f"3. Calculate the layout dynamically")


Exporting full graph: 1860411 nodes and 533295 edges
Full graph exported to graph_data.json

Full graph exported. The visualization tool will now allow you to:
1. Search for any model ID
2. Automatically find and display its connected component
3. Calculate the layout dynamically


In [4]:
import json
from collections import defaultdict

def get_family_tree(G, model_id):
    """
    Get the entire connected component (family tree) containing a given model.
    
    Parameters:
    - G: networkx directed graph
    - model_id: the model ID to find the family tree for
    
    Returns:
    - Set of nodes in the connected component
    """
    if model_id not in G:
        return set()
    
    # Find the weakly connected component containing this node
    for component in nx.weakly_connected_components(G):
        if model_id in component:
            return component
    return set()

def export_family_tree_to_json(G, model_id, output_file='graph_data.json',
                                include_attributes=['likes', 'downloads', 'createdAt', 'pipeline_tag', 'library_name']):
    """
    Export the entire family tree (connected component) of a given model to JSON.
    
    Parameters:
    - G: networkx graph
    - model_id: model ID to extract family tree for
    - output_file: output JSON file path
    - include_attributes: list of node attributes to include
    """
    # Get the entire connected component
    family_tree_nodes = get_family_tree(G, model_id)
    
    if not family_tree_nodes:
        print(f"Model '{model_id}' not found in graph")
        return None
    
    # Build subgraph containing the entire family tree
    G_sub = G.subgraph(family_tree_nodes).copy()
    
    print(f"Exporting family tree for '{model_id}': {len(G_sub.nodes())} nodes and {len(G_sub.edges())} edges")
    
    # Prepare nodes data - all same size and color
    nodes_data = []
    for node_id in G_sub.nodes():
        node_data = {
            'id': node_id,
            'name': node_id.split('/')[-1] if '/' in node_id else node_id  # Short name
        }
        
        # Add requested attributes
        for attr in include_attributes:
            if attr in G_sub.nodes[node_id]:
                value = G_sub.nodes[node_id][attr]
                # Handle NaN and None values
                if pd.isna(value) if hasattr(pd, 'isna') else (value != value):
                    node_data[attr] = None
                else:
                    node_data[attr] = value
        
        # All nodes same size (no size variation)
        node_data['size'] = 1.0
        node_data['downloads'] = G_sub.nodes[node_id].get('downloads', 0)
        node_data['likes'] = G_sub.nodes[node_id].get('likes', 0)
        
        nodes_data.append(node_data)
    
    # Prepare edges data
    edges_data = []
    for source, target in G_sub.edges():
        edge_data = {
            'source': source,
            'target': target
        }
        
        # Add edge attributes
        if 'edge_type' in G_sub.edges[source, target]:
            edge_data['type'] = G_sub.edges[source, target]['edge_type']
        elif 'edge_types' in G_sub.edges[source, target]:
            edge_types = G_sub.edges[source, target]['edge_types']
            edge_data['type'] = edge_types[0] if edge_types else 'unknown'
        else:
            edge_data['type'] = 'unknown'
        
        edges_data.append(edge_data)
    
    # Create final JSON structure
    graph_json = {
        'nodes': nodes_data,
        'edges': edges_data,
        'metadata': {
            'total_nodes': len(nodes_data),
            'total_edges': len(edges_data),
            'root_model': model_id,
            'family_tree_size': len(family_tree_nodes)
        }
    }
    
    # Write to JSON file
    with open(output_file, 'w') as f:
        json.dump(graph_json, f, indent=2)
    
    print(f"Family tree exported to {output_file}")
    return graph_json

# Helper function to search for models by name
def search_models(G, search_term, limit=10):
    """
    Search for models in the graph by name or ID.
    
    Parameters:
    - G: networkx graph
    - search_term: string to search for
    - limit: maximum number of results to return
    
    Returns:
    - List of matching model IDs
    """
    search_term = search_term.lower()
    matches = []
    
    for node_id in G.nodes():
        node_name = node_id.split('/')[-1].lower() if '/' in node_id else node_id.lower()
        node_id_lower = node_id.lower()
        
        if search_term in node_name or search_term in node_id_lower:
            matches.append(node_id)
            if len(matches) >= limit:
                break
    
    return matches

# Example: Search for models
search_results = search_models(G, 'llama', limit=10)
print("Example search results for 'llama':")
for i, model_id in enumerate(search_results[:5], 1):
    print(f"{i}. {model_id}")

# Export a family tree for a specific model
# Change this to any model ID you want to visualize
#example_model = 'meta-llama/Llama-3.1-8B-Instruct'  # Change this to any model ID
example_model = 'zera09/SmolVLM'

# Export the family tree
#graph_data = export_family_tree_to_json(
#    G,
#    model_id=example_model,
#    output_file='graph_data.json',
#    include_attributes=['likes', 'downloads', 'createdAt', 'pipeline_tag', 'library_name']
#)

print(f"\nTo visualize a different family tree:")
print(f"1. Use search_models(G, 'your_search_term') to find model IDs")
print(f"2. Change example_model above to your desired model ID")
print(f"3. Re-run this cell to export the new tree")
print(f"4. Refresh the visualization page")


Example search results for 'llama':
1. meta-llama/Llama-3.1-8B-Instruct
2. marcelbinz/Llama-3.1-Centaur-70B
3. meta-llama/Meta-Llama-3-8B-Instruct
4. SicariusSicariiStuff/Impish_LLAMA_4B
5. marcelbinz/Llama-3.1-Centaur-70B-adapter

To visualize a different family tree:
1. Use search_models(G, 'your_search_term') to find model IDs
2. Change example_model above to your desired model ID
3. Re-run this cell to export the new tree
4. Refresh the visualization page


In [5]:
# To view the visualization:

# Option 1: Run a local web server (recommended)
# Run this in your terminal:
# python -m http.server 8000
# Then open: http://localhost:8000/visualization_3d.html

# Option 2: Use this Python code to start a server:
import http.server
import socketserver
import webbrowser
import threading

PORT = 8000

def start_server():
    Handler = http.server.SimpleHTTPRequestHandler
    with socketserver.TCPServer(("", PORT), Handler) as httpd:
        print(f"Server running at http://localhost:{PORT}/")
        print("Open visualization_3d.html in your browser")
        httpd.serve_forever()

# Uncomment the lines below to automatically start the server and open the browser
server_thread = threading.Thread(target=start_server, daemon=True)
server_thread.start()
webbrowser.open(f'http://localhost:{PORT}/visualization_3d.html')

print("To visualize a different family tree:")
print("1. Use search_models(G, 'search_term') in the previous cell to find models")
print("2. Change example_model in the previous cell to your desired model ID")
print("3. Re-run the previous cell to export the new tree")
print("4. Refresh the visualization page in your browser")


Server running at http://localhost:8000/
Open visualization_3d.html in your browser
To visualize a different family tree:
1. Use search_models(G, 'search_term') in the previous cell to find models
2. Change example_model in the previous cell to your desired model ID
3. Re-run the previous cell to export the new tree
4. Refresh the visualization page in your browser


127.0.0.1 - - [10/Dec/2025 13:34:22] "GET /visualization_3d.html HTTP/1.1" 200 -
127.0.0.1 - - [10/Dec/2025 13:34:22] "GET /graph_data.json HTTP/1.1" 200 -


## 3D Interactive Visualization

The graph has been exported to `graph_data.json`. Now we'll create an HTML file with a Three.js-based 3D visualization that allows you to:
- Navigate in 3D space (zoom, pan, rotate)
- Click on nodes to see details
- Filter by edge type
- Search for specific models
