In [18]:
import networkx as nx
import pandas as pd
import seaborn as sns
from ipysigma import Sigma
import numpy as np


merged_df = pd.read_csv('atlas_tc_dclasses.csv')
# Step 1: Create a graph object
G = nx.Graph()

# Generate the tab20 palette using Seaborn
palette = sns.color_palette("tab20", 20)  # Generate 20 distinct colors (for TC0 to TC17, plus two more)

# Create a dictionary to map TC categories to colors
tc_palette = {f'TC{i}': f'rgb({int(r*255)}, {int(g*255)}, {int(b*255)})' for i, (r, g, b) in enumerate(palette[:18])}

# Add colors for 'family' and 'domain' categories
tc_palette['family'] = f'rgb({int(palette[18][0]*255)}, {int(palette[18][1]*255)}, {int(palette[18][2]*255)})'
tc_palette['domain'] = f'rgb({int(palette[19][0]*255)}, {int(palette[19][1]*255)}, {int(palette[19][2]*255)})'

# Step 2: Add edges between TCs, subfamilies, and D_classes based on shared occurrences in rows
for index, row in merged_df.iterrows():
    tc = row['TC']
    subfamily = row['subfamilies']
    domains = row['D_classes']
    names = row['Names']
    evalue_tc = row['E-value_1_x']
    evalue_D = row['E-value_1_y']
    
    # Add nodes for TC, subfamily, domains, and names
    G.add_node(tc, type='TC', tc_category=tc)
    G.add_node(subfamily, type='subfamily or family', tc_category='family')
    G.add_node(domains, type='D_classes', tc_category='domain')
    G.add_node(names, type='Names', tc_category='Names')
    
    # Calculate weights for edges
    weight_tc_subfamily = len(merged_df[(merged_df['TC'] == tc) & (merged_df['subfamilies'] == subfamily)])
    weight_tc_domains = len(merged_df[(merged_df['TC'] == tc) & (merged_df['D_classes'] == domains)])
    weight_domains_subfamily = len(merged_df[(merged_df['D_classes'] == domains) & (merged_df['subfamilies'] == subfamily)])

    # Add edges with weights and evalues only where applicable
    G.add_edge(tc, subfamily, weight=weight_tc_subfamily) 
    G.add_edge(tc, domains, weight=weight_tc_domains)     
    G.add_edge(domains, subfamily, weight=weight_domains_subfamily)  
    G.add_edge(subfamily, names)                            
    G.add_edge(domains, names, evalue=evalue_D)            
    G.add_edge(tc, names, evalue=evalue_tc)                

# Step 3: Assign degrees to nodes for size scaling
for node in G.nodes():
    G.nodes[node]['degree'] = G.degree(node)
    G.nodes[node]['color'] = tc_palette.get(G.nodes[node]['tc_category'], 'grey')  # Default to grey if no category color

# Displaying the graph with ipysigma using the custom palette
sigma = Sigma(
    G,
    node_color="tc_category",      # Color nodes by the 'tc_category' attribute
    node_color_palette=tc_palette,
    node_metrics=['louvain'],
    node_size_range=(3, 30),
    max_categorical_colors=20,
    edge_size='weight',
    edge_size_range=(1, 30),
    edge_label='evalue',
    default_edge_type='curve',
    node_border_color_from='node',
    default_node_label_size=24,
    node_size='degree'
)

display(sigma)


Sigma(nx.Graph with 2,264 nodes and 9,214 edges)

In [21]:
sigma.render_snapshot()

Output(outputs=({'output_type': 'stream', 'name': 'stdout', 'text': 'Rendering snapshot from widget (are you s…