# NetworkX BioGRID Interactions Analysis

This notebook analyzes gene interaction networks using data from BioGRID. It includes:
- Loading and parsing JSON data.
- Creating a gene interaction network.
- Performing comprehensive network analysis.
- Visualizing the network.
- Saving results and graphs.

## 1. Setup and Configuration

In [None]:
!pip install -q networkx plotly pyvis python-louvain matplotlib

In [None]:
from pathlib import Path
import json
import networkx as nx
import plotly.graph_objects as go
from pyvis.network import Network
import logging
import community as community_louvain  # For community detection
import matplotlib.pyplot as plt

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Define paths
INTERIM_DIR = Path("../../data/interim")
PROCESSED_DIR = Path("../../data/processed")

# Input JSON file (created using biogrid_interactions.ipynb)
biogrid_interactions_json_file = "pmid_35559673_interactions.json"
biogrid_interactions_json_src = INTERIM_DIR / biogrid_interactions_json_file

# Output files
biogrid_interactions_network_graphml = PROCESSED_DIR / biogrid_interactions_json_file.replace('.json', '_network.graphml')
biogrid_interactions_network_html = PROCESSED_DIR / biogrid_interactions_json_file.replace('.json', '_network.html')

## 2. Data Loading

In [None]:
def load_json_data(filepath):
    """
    Load JSON data from a file.

    Args:
        filepath (Path): Path to the JSON file.

    Returns:
        dict: Parsed JSON data.
    """
    try:
        with open(filepath, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        logging.error(f"File not found: {filepath}")
        raise
    except json.JSONDecodeError:
        logging.error(f"Invalid JSON data in file: {filepath}")
        raise

In [None]:
# Example usage
json_data = load_json_data(biogrid_interactions_json_src)

## 3. Graph Creation

In [None]:
def create_gene_network(interactions):
    """
    Create a gene interaction network from JSON data.

    Args:
        interactions (dict): Parsed JSON data containing gene interactions.

    Returns:
        G (networkx.Graph): A graph representing gene interactions.
    """
    G = nx.Graph()
    
    for interaction_id, data in interactions.items():
        try:
            gene_a = data['OFFICIAL_SYMBOL_A']
            gene_b = data['OFFICIAL_SYMBOL_B']
        except KeyError:
            logging.warning(f"Skipping interaction {interaction_id} due to missing data.")
            continue
        
        # Add nodes with attributes
        G.add_node(gene_a, entrez_id=data['ENTREZ_GENE_A'], synonyms=data['SYNONYMS_A'])
        G.add_node(gene_b, entrez_id=data['ENTREZ_GENE_B'], synonyms=data['SYNONYMS_B'])
        
        # Add edge with attributes
        G.add_edge(gene_a, gene_b, interaction_id=interaction_id, pubmed_id=data['PUBMED_ID'],
                   pubmed_author=data['PUBMED_AUTHOR'], throughput=data['THROUGHPUT'],
                   qualifications=data['QUALIFICATIONS'])
    
    logging.info(f"Created network with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
    return G

In [None]:
# Example usage
G = create_gene_network(json_data)

In [None]:
import pandas as pd

# Extract node properties from the graph
node_data = []
for node in G.nodes():
    node_data.append({
        'Gene': node,
        'Degree': G.degree(node),
        'Degree Centrality': analysis_results['centralities']['degree_centrality'].get(node, 0),
        'Betweenness Centrality': analysis_results['centralities']['betweenness_centrality'].get(node, 0),
        'Closeness Centrality': analysis_results['centralities']['closeness_centrality'].get(node, 0),
        'Eigenvector Centrality': analysis_results['centralities']['eigenvector_centrality'].get(node, 0),
        'Community': analysis_results['communities'].get(node, -1)
    })

# Create a DataFrame from the node data
df = pd.DataFrame(node_data)

# Display the DataFrame
df.head()

## 4. Network Analysis

In [None]:
def analyze_network(G):
    """
    Analyze the gene interaction network.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Network statistics and metrics.
    """
    stats = {
        'Number of nodes': G.number_of_nodes(),
        'Number of edges': G.number_of_edges(),
        'Average degree': sum(dict(G.degree()).values()) / G.number_of_nodes(),
        'Density': nx.density(G),
        'Is connected': nx.is_connected(G),
        'Highest degree nodes': sorted(dict(G.degree()).items(), key=lambda x: x[1], reverse=True)[:5]
    }
    return stats

def calculate_centralities(G):
    """
    Calculate centrality measures for the network.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Centrality measures (degree, betweenness, closeness, eigenvector).
    """
    return {
        'degree_centrality': nx.degree_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G),
        'closeness_centrality': nx.closeness_centrality(G),
        'eigenvector_centrality': nx.eigenvector_centrality(G)
    }

def detect_communities(G):
    """
    Detect communities in the network using the Louvain method.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Mapping of nodes to their community IDs.
    """
    return community_louvain.best_partition(G)

def additional_metrics(G):
    """
    Calculate additional network metrics.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Additional metrics (assortativity, diameter, average path length).
    """
    return {
        'assortativity': nx.degree_assortativity_coefficient(G),
        'diameter': nx.diameter(G),
        'average_path_length': nx.average_shortest_path_length(G)
    }

def comprehensive_analysis(G):
    """
    Perform a comprehensive analysis of the gene interaction network.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Comprehensive analysis results.
    """
    return {
        'basic_stats': analyze_network(G),
        'centralities': calculate_centralities(G),
        'communities': detect_communities(G),
        'clustering_coefficient': nx.average_clustering(G),
        'additional_metrics': additional_metrics(G)
    }

In [None]:
# Example usage
analysis_results = comprehensive_analysis(G)
print("Comprehensive Analysis Results:")
print(analysis_results)

## 5. Visualization

In [None]:
def visualize_networkx_graph(G):
    """
    Visualize the network using NetworkX and Matplotlib.

    Args:
        G (networkx.Graph): The gene interaction network.
    """
    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G, seed=42)
    nx.draw_networkx_nodes(G, pos, node_size=500, node_color='lightblue')
    nx.draw_networkx_edges(G, pos, width=1, edge_color='gray')
    nx.draw_networkx_labels(G, pos, font_size=10, font_family='sans-serif')
    plt.title("Gene Interaction Network")
    plt.axis('off')
    plt.show()

def visualize_pyvis_graph(G, partition=None):
    """
    Visualize the network using PyVis.

    Args:
        G (networkx.Graph): The gene interaction network.
        partition (dict): Mapping of nodes to their community IDs.
    """
    net = Network(notebook=True, height="750px", width="100%", bgcolor="#222222", font_color="white")
    
    # Add nodes with size based on degree
    for node in G.nodes():
        degree = G.degree(node)
        net.add_node(node, label=node, size=degree * 2)
    
    # Add edges
    for edge in G.edges():
        net.add_edge(edge[0], edge[1])
    
    # Save and display the graph
    net.show(str(biogrid_interactions_network_html))
    logging.info(f"Graph saved to {biogrid_interactions_network_html}")

In [None]:
# Example usage
# visualize_networkx_graph(G)
visualize_pyvis_graph(G)

## 6. Summary and Export

In [None]:
def print_summary(analysis_results):
    """
    Print a summary of the analysis results.

    Args:
        analysis_results (dict): Comprehensive analysis results.
    """
    print("Network Summary:")
    print(f"Number of nodes: {analysis_results['basic_stats']['Number of nodes']}")
    print(f"Number of edges: {analysis_results['basic_stats']['Number of edges']}")
    print(f"Top 5 nodes by degree: {analysis_results['basic_stats']['Highest degree nodes']}")
    print(f"Average clustering coefficient: {analysis_results['clustering_coefficient']}")
    print(f"Network diameter: {analysis_results['additional_metrics']['diameter']}")

def export_graph(G, filepath):
    """
    Export the graph to a file.

    Args:
        G (networkx.Graph): The gene interaction network.
        filepath (Path): Path to save the graph.
    """
    nx.write_graphml(G, filepath)
    logging.info(f"Graph exported to {filepath}")

In [None]:
# Example usage
print_summary(analysis_results)
export_graph(G, biogrid_interactions_network_graphml)

In [None]:
def visualize_gene_network(G, output_file):
    """
    Create an interactive visualization of the gene network with enhanced hover capabilities.
    
    Args:
        G (networkx.Graph): The gene interaction network
        output_file (str): Path to save the HTML output
    """
    # Create Pyvis network
    net = Network(notebook=True, height="750px", width="100%", 
                 bgcolor="#ffffff", font_color="black")
    
    # Add nodes with enhanced hover information
    for node, node_data in G.nodes(data=True):
        # Create detailed hover text for nodes
        hover_info = f"""
        <div style='padding: 10px;'>
            <b>Gene:</b> {node}<br>
            <b>Entrez ID:</b> {node_data.get('entrez_id', 'N/A')}<br>
            <b>Synonyms:</b> {', '.join(node_data.get('synonyms', []))}<br>
        </div>
        """
        
        # Add node with properties
        net.add_node(node,
                    title=hover_info,
                    color='#32cd32',  # Lime green for genes
                    size=G.degree(node) * 2)  # Size based on degree
    
    # Add edges with enhanced hover information
    for source, target, edge_data in G.edges(data=True):
        # Create detailed hover text for edges
        hover_info = f"""
        <div style='padding: 10px;'>
            <b>Interaction ID:</b> {edge_data.get('interaction_id', 'N/A')}<br>
            <b>PubMed ID:</b> {edge_data.get('pubmed_id', 'N/A')}<br>
            <b>Author:</b> {edge_data.get('pubmed_author', 'N/A')}<br>
            <b>Throughput:</b> {edge_data.get('throughput', 'N/A')}<br>
            <b>Qualifications:</b> {edge_data.get('qualifications', 'N/A')}<br>
        </div>
        """
        
        # Add edge with properties
        net.add_edge(source, target,
                    title=hover_info,
                    color={'color': '#666666', 'highlight': '#ff0000'},
                    width=1.5)
    
    # Configure physics options for better layout
    physics_options = {
        "physics": {
            "forceAtlas2Based": {
                "gravitationalConstant": -100,
                "centralGravity": 0.01,
                "springLength": 200,
                "springConstant": 0.08,
                "damping": 0.4,
                "avoidOverlap": 1
            },
            "solver": "forceAtlas2Based",
            "stabilization": {
                "enabled": True,
                "iterations": 1000,
                "updateInterval": 25
            }
        },
        "interaction": {
            "hover": True,
            "tooltipDelay": 100
        }
    }
    
    # Apply options to network
    net.set_options(json.dumps(physics_options))
    
    # Save the network
    net.show(output_file)
    logging.info(f"Interactive network visualization saved to {output_file}")

def add_network_statistics(output_file, analysis_results):
    """
    Add network statistics to the visualization HTML file.
    
    Args:
        output_file (str): Path to the HTML file
        analysis_results (dict): Results from network analysis
    """
    # Create statistics panel HTML
    stats_html = f"""
    <div style="position: absolute; top: 10px; right: 10px; 
                background-color: rgba(255, 255, 255, 0.9); 
                padding: 10px; border-radius: 5px; border: 1px solid #ccc;">
        <h3>Network Statistics</h3>
        <ul style="list-style-type: none; padding: 0;">
            <li>Nodes: {analysis_results['basic_stats']['Number of nodes']}</li>
            <li>Edges: {analysis_results['basic_stats']['Number of edges']}</li>
            <li>Average Degree: {analysis_results['basic_stats']['Average degree']:.2f}</li>
            <li>Density: {analysis_results['basic_stats']['Density']:.3f}</li>
            <li>Clustering Coefficient: {analysis_results['clustering_coefficient']:.3f}</li>
        </ul>
    </div>
    """
    
    # Add statistics panel to the visualization
    with open(output_file, 'r', encoding='utf-8') as file:
        content = file.read()
    content = content.replace('</body>', f'{stats_html}</body>')
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(content)

# Example usage:
# visualize_gene_network(G, "gene_network.html")
# add_network_statistics("gene_network.html", analysis_results)

In [None]:
# Create and analyze the network
G = create_gene_network(json_data)
analysis_results = comprehensive_analysis(G)

# Create the enhanced visualization
visualize_gene_network(G, str(biogrid_interactions_network_html))
add_network_statistics(str(biogrid_interactions_network_html), analysis_results)

In [None]:
import networkx as nx
from pyvis.network import Network
import json

def enhance_visualization(graph_data, output_file):
    G = nx.Graph()  # Or your existing graph loading logic
    
    # Adding nodes and edges from your data
    for node_id, attributes in graph_data["nodes"].items():
        G.add_node(node_id, **attributes)
    for source, target, attributes in graph_data["edges"]:
        G.add_edge(source, target, **attributes)
    
    # Pyvis Network
    net = Network(height='900px', width='100%', notebook=False)
    
    # Node coloring and hover info
    entity_colors = {
        "GENE": "#32cd32",
        "PROTEIN": "#6a5acd",
        "UNKNOWN": "#808080",
    }
    
    for node, data in G.nodes(data=True):
        entity_type = data.get("type", "UNKNOWN")
        color = entity_colors.get(entity_type, "#808080")
        title = f"{node}<br>Type: {entity_type}<br>Description: {data.get('description', 'N/A')}"
        net.add_node(node, label=node, color=color, title=title)
    
    # Edge hover info
    for source, target, data in G.edges(data=True):
        title = f"Weight: {data.get('weight', 'N/A')}<br>Details: {data.get('description', 'N/A')}"
        net.add_edge(source, target, title=title)
    
    # Physics configuration
    physics_config = {
        "physics": {
            "forceAtlas2Based": {
                "gravitationalConstant": -50,
                "centralGravity": 0.005,
                "springLength": 100,
                "springConstant": 0.08,
            },
            "solver": "forceAtlas2Based",
            "stabilization": {"enabled": True},
        }
    }
    net.set_options(json.dumps(physics_config))
    
    # Save HTML and add legend
    net.write_html(output_file)
    print(f"Visualization saved as {output_file}")


In [None]:
enhance_visualization(G, "enhanced_network.html")

In [None]:
print(G)

In [None]:
import networkx as nx
from pyvis.network import Network

def enhanced_pyvis_visualization(G, output_file):
    """
    Creates an enhanced interactive visualization of a NetworkX graph using Pyvis.
    
    Args:
        G (networkx.Graph): The gene interaction network.
        output_file (str): Path to save the interactive visualization.
    """
    # Initialize Pyvis Network
    net = Network(height='900px', width='100%', bgcolor='#ffffff', font_color='black')

    # Define color schemes
    entity_colors = {
        "GENE": "#32cd32",  # Lime green
        "NEGATIVE_GENETIC": "#ff4500",  # Orange red
        "POSITIVE_GENETIC": "#1e90ff",  # Dodger blue
        "HUMAN": "#4682b4",  # Steel blue
        "OTHER": "#808080",  # Grey
    }
    edge_colors = {
        "genetic": "#ffa500",  # Orange
        "physical": "#0000ff",  # Blue
    }


    # Add nodes
    for node, data in G.nodes(data=True):
        entity_type = data.get("type", "GENE")
        organism = "HUMAN" if data.get("organism") == "9606" else "OTHER"
        color = entity_colors.get(organism, "#808080")  # Default to grey for unknown
        hover_text = (
            f"<b>Node:</b> {node}<br>"
            f"<b>Type:</b> {entity_type}<br>"
            f"<b>Organism:</b> {organism}<br>"
            f"<b>Description:</b> {data.get('description', 'N/A')}"
        )
        net.add_node(node, title=hover_text, color=color)

    # Add edges
    for source, target, data in G.edges(data=True):
        edge_type = data.get("interaction_type", "genetic")
        color = edge_colors.get(edge_type, "#808080")  # Default to grey
        weight = data.get("weight", 1)
        hover_text = (
            f"<b>Interaction Type:</b> {edge_type}<br>"
            f"<b>Weight:</b> {weight}<br>"
            f"<b>Details:</b> {data.get('description', 'N/A')}"
        )
        net.add_edge(source, target, title=hover_text, color=color, width=float(weight))

    # Set physics options for interactive layout
    net.set_options("""
    var options = {
        "physics": {
            "forceAtlas2Based": {
                "gravitationalConstant": -50,
                "springLength": 150,
                "springConstant": 0.08,
                "avoidOverlap": 1
            },
            "solver": "forceAtlas2Based",
            "stabilization": {
                "enabled": true,
                "iterations": 1000
            }
        }
    }
    """)

    # Save the visualization as an HTML file
    net.write_html(output_file)
    print(f"Enhanced visualization saved to {output_file}")

# Example Usage
# Assuming `G` is your NetworkX graph with metadata added
# enhanced_pyvis_visualization(G, "enhanced_network.html")


In [None]:
# Assuming `G` is your NetworkX graph with metadata added
enhanced_pyvis_visualization(G, "enhanced_network.html")

In [None]:
import networkx as nx
from pyvis.network import Network
import json

def create_interactive_visualization_with_legend(G, output_file):
    """
    Creates an interactive visualization of a NetworkX graph with a legend.

    Args:
        graph_file (str): Path to the input GraphML or JSON file.
        output_file (str): Path to save the interactive visualization.
    """
    # Load the GraphML or JSON file
    #G = nx.read_graphml(graph_file)

    # Create Pyvis network
    net = Network(height='900px', width='100%', bgcolor='#ffffff', 
                  font_color='black', notebook=False)
    
    # Define color scheme for entity types
    entity_colors = {
        'GENE': '#32cd32',                # Lime Green
        'PROTEIN': '#6a5acd',             # Slate Blue
        'PATHWAY': '#ff4500',             # Orange Red
        'METABOLITE': '#20b2aa',          # Light Sea Green
        'CATEGORY': '#f781bf',            # Pink
        'STATE': '#7fffd4',               # Aquamarine
        'UNKNOWN': '#808080'              # Dark Gray
    }
    
    # Add nodes with colors
    for node_id, node_data in G.nodes(data=True):
        # Get entity type, default to UNKNOWN if missing
        entity_type = node_data.get('entity_type', 'UNKNOWN')
        color = entity_colors.get(entity_type, '#808080')
        
        # Create hover info
        hover_info = f"""
        Entity: {node_id}<br>
        Type: {entity_type}<br>
        Description: {node_data.get('description', 'N/A')}<br>
        Source ID: {node_data.get('source_id', 'N/A')}
        """
        
        # Add node
        net.add_node(node_id, 
                     title=hover_info,
                     color=color,
                     size=30)

    # Add edges
    for source, target, edge_data in G.edges(data=True):
        weight = edge_data.get('weight', 1)
        description = edge_data.get('description', '')
        
        hover_info = f"""
        Weight: {weight}<br>
        Description: {description}<br>
        Keywords: {edge_data.get('keywords', 'N/A')}
        """
        
        # Add edge with hover and weight
        net.add_edge(source, target, 
                     title=hover_info,
                     width=float(weight),
                     color='#666666')

    # Set physics options
    physics_options = {
        "physics": {
            "forceAtlas2Based": {
                "gravitationalConstant": -100,
                "centralGravity": 0.01,
                "springLength": 200,
                "springConstant": 0.08,
                "damping": 0.4,
                "avoidOverlap": 1
            },
            "solver": "forceAtlas2Based",
            "stabilization": {
                "enabled": True,
                "iterations": 1000,
                "updateInterval": 25
            }
        }
    }
    net.set_options(json.dumps(physics_options))
    
    # Save visualization
    net.write_html(output_file)
    
    # Add legend to the visualization
    legend_html = """
    <div style="position: absolute; top: 10px; left: 10px; background-color: rgba(255, 255, 255, 0.9); 
                padding: 10px; border-radius: 5px; border: 1px solid #ccc;">
        <h3>Entity Types</h3>
        <ul style="list-style-type: none; padding: 0;">
    """
    
    for entity_type, color in entity_colors.items():
        legend_html += f"""
            <li style="margin: 5px 0;">
                <span style="display: inline-block; width: 20px; height: 20px; 
                           background-color: {color}; border-radius: 50%; margin-right: 5px;"></span>
                {entity_type}
            </li>
        """
    
    legend_html += """
        </ul>
    </div>
    """
    
    with open(output_file, 'r', encoding='utf-8') as file:
        content = file.read()
    content = content.replace('</body>', f'{legend_html}</body>')
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(content)
    
    print(f"Interactive visualization with legend saved to {output_file}")

# Example Usage:



In [None]:
create_interactive_visualization_with_legend(G, "output.html")

In [None]:
import collections

def get_most_frequent_node_types(G):
    """
    Determine the most frequent node types in a NetworkX graph.

    Args:
        G (networkx.Graph): The graph to analyze.

    Returns:
        collections.Counter: A counter of node types and their frequencies.
    """
    # Extract node types
    node_types = [data.get('entity_type', 'UNKNOWN') for _, data in G.nodes(data=True)]

    # Count frequencies
    type_counts = collections.Counter(node_types)

    return type_counts

# Example Usage
# Assuming `G` is your NetworkX graph
type_frequencies = get_most_frequent_node_types(G)
print("Node Type Frequencies:")
for node_type, count in type_frequencies.most_common():
    print(f"{node_type}: {count}")
