# NetworkX BioGRID Interactions Analysis

This notebook analyzes gene interaction networks using data from BioGRID. It includes:
- Loading and parsing JSON data.
- Creating a gene interaction network.
- Performing comprehensive network analysis.
- Visualizing the network.
- Saving results and graphs.

## 1. Setup and Configuration

In [1]:
from pathlib import Path
import json
import networkx as nx
import plotly.graph_objects as go
from pyvis.network import Network
import logging
import community as community_louvain  # For community detection
import matplotlib.pyplot as plt

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Define paths
INTERIM_DIR = Path("../data/interim")
PROCESSED_DIR = Path("../data/processed")

# Input JSON file (created using biogrid_interactions.ipynb)
biogrid_interactions_json_file = "pmid_35559673_interactions.json"
biogrid_interactions_json_src = INTERIM_DIR / biogrid_interactions_json_file

# Output files
biogrid_interactions_network_graphml = PROCESSED_DIR / biogrid_interactions_json_file.replace('.json', '_network.graphml')
biogrid_interactions_network_html = PROCESSED_DIR / biogrid_interactions_json_file.replace('.json', '_network.html')

## 2. Data Loading

In [2]:
def load_json_data(filepath):
    """
    Load JSON data from a file.

    Args:
        filepath (Path): Path to the JSON file.

    Returns:
        dict: Parsed JSON data.
    """
    try:
        with open(filepath, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        logging.error(f"File not found: {filepath}")
        raise
    except json.JSONDecodeError:
        logging.error(f"Invalid JSON data in file: {filepath}")
        raise

In [3]:
# Example usage
json_data = load_json_data(biogrid_interactions_json_src)

## 3. Graph Creation

In [4]:
def create_gene_network(interactions):
    """
    Create a gene interaction network from JSON data.

    Args:
        interactions (dict): Parsed JSON data containing gene interactions.

    Returns:
        G (networkx.Graph): A graph representing gene interactions.
    """
    G = nx.Graph()
    
    for interaction_id, data in interactions.items():
        try:
            gene_a = data['OFFICIAL_SYMBOL_A']
            gene_b = data['OFFICIAL_SYMBOL_B']
        except KeyError:
            logging.warning(f"Skipping interaction {interaction_id} due to missing data.")
            continue
        
        # Add nodes with attributes
        G.add_node(gene_a, entrez_id=data['ENTREZ_GENE_A'], synonyms=data['SYNONYMS_A'])
        G.add_node(gene_b, entrez_id=data['ENTREZ_GENE_B'], synonyms=data['SYNONYMS_B'])
        
        # Add edge with attributes
        G.add_edge(gene_a, gene_b, interaction_id=interaction_id, pubmed_id=data['PUBMED_ID'],
                   pubmed_author=data['PUBMED_AUTHOR'], throughput=data['THROUGHPUT'],
                   qualifications=data['QUALIFICATIONS'])
    
    logging.info(f"Created network with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")
    return G

In [None]:
# Example usage
G = create_gene_network(json_data)

## 4. Network Analysis

In [6]:
def analyze_network(G):
    """
    Analyze the gene interaction network.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Network statistics and metrics.
    """
    stats = {
        'Number of nodes': G.number_of_nodes(),
        'Number of edges': G.number_of_edges(),
        'Average degree': sum(dict(G.degree()).values()) / G.number_of_nodes(),
        'Density': nx.density(G),
        'Is connected': nx.is_connected(G),
        'Highest degree nodes': sorted(dict(G.degree()).items(), key=lambda x: x[1], reverse=True)[:5]
    }
    return stats

def calculate_centralities(G):
    """
    Calculate centrality measures for the network.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Centrality measures (degree, betweenness, closeness, eigenvector).
    """
    return {
        'degree_centrality': nx.degree_centrality(G),
        'betweenness_centrality': nx.betweenness_centrality(G),
        'closeness_centrality': nx.closeness_centrality(G),
        'eigenvector_centrality': nx.eigenvector_centrality(G)
    }

def detect_communities(G):
    """
    Detect communities in the network using the Louvain method.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Mapping of nodes to their community IDs.
    """
    return community_louvain.best_partition(G)

def additional_metrics(G):
    """
    Calculate additional network metrics.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Additional metrics (assortativity, diameter, average path length).
    """
    return {
        'assortativity': nx.degree_assortativity_coefficient(G),
        'diameter': nx.diameter(G),
        'average_path_length': nx.average_shortest_path_length(G)
    }

def comprehensive_analysis(G):
    """
    Perform a comprehensive analysis of the gene interaction network.

    Args:
        G (networkx.Graph): The gene interaction network.

    Returns:
        dict: Comprehensive analysis results.
    """
    return {
        'basic_stats': analyze_network(G),
        'centralities': calculate_centralities(G),
        'communities': detect_communities(G),
        'clustering_coefficient': nx.average_clustering(G),
        'additional_metrics': additional_metrics(G)
    }

In [None]:
# Example usage
analysis_results = comprehensive_analysis(G)
print("Comprehensive Analysis Results:")
print(analysis_results)

## 5. Visualization

In [8]:
def visualize_networkx_graph(G):
    """
    Visualize the network using NetworkX and Matplotlib.

    Args:
        G (networkx.Graph): The gene interaction network.
    """
    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(G, seed=42)
    nx.draw_networkx_nodes(G, pos, node_size=500, node_color='lightblue')
    nx.draw_networkx_edges(G, pos, width=1, edge_color='gray')
    nx.draw_networkx_labels(G, pos, font_size=10, font_family='sans-serif')
    plt.title("Gene Interaction Network")
    plt.axis('off')
    plt.show()

def visualize_pyvis_graph(G, partition=None):
    """
    Visualize the network using PyVis.

    Args:
        G (networkx.Graph): The gene interaction network.
        partition (dict): Mapping of nodes to their community IDs.
    """
    net = Network(notebook=True, height="750px", width="100%", bgcolor="#222222", font_color="white")
    
    # Add nodes with size based on degree
    for node in G.nodes():
        degree = G.degree(node)
        net.add_node(node, label=node, size=degree * 2)
    
    # Add edges
    for edge in G.edges():
        net.add_edge(edge[0], edge[1])
    
    # Save and display the graph
    net.show(str(biogrid_interactions_network_html))
    logging.info(f"Graph saved to {biogrid_interactions_network_html}")

In [None]:
# Example usage
# visualize_networkx_graph(G)
visualize_pyvis_graph(G)

## 6. Summary and Export

In [10]:
def print_summary(analysis_results):
    """
    Print a summary of the analysis results.

    Args:
        analysis_results (dict): Comprehensive analysis results.
    """
    print("Network Summary:")
    print(f"Number of nodes: {analysis_results['basic_stats']['Number of nodes']}")
    print(f"Number of edges: {analysis_results['basic_stats']['Number of edges']}")
    print(f"Top 5 nodes by degree: {analysis_results['basic_stats']['Highest degree nodes']}")
    print(f"Average clustering coefficient: {analysis_results['clustering_coefficient']}")
    print(f"Network diameter: {analysis_results['additional_metrics']['diameter']}")

def export_graph(G, filepath):
    """
    Export the graph to a file.

    Args:
        G (networkx.Graph): The gene interaction network.
        filepath (Path): Path to save the graph.
    """
    nx.write_graphml(G, filepath)
    logging.info(f"Graph exported to {filepath}")

In [None]:
# Example usage
print_summary(analysis_results)
export_graph(G, biogrid_interactions_network_graphml)