In [1]:
from pathlib import Path

# Define configuration constants
INTERIM_DIR = Path("../data/interim")  # Standardize path using pathlib
PROCESSED_DIR = Path("../data/processed")  # Standardize path using pathlib

# src file must exist in interim directory.  created using biogrid_interactions.ipynb notebook
#biogrid_interactions_json_file = "pmid_35559673_interactions.json"
biogrid_interactions_json_file = "TP53_interactions.json"
#biogrid_interactions_json_file = "RB1_interactions.json"
biogrid_interactions_json_src = INTERIM_DIR / biogrid_interactions_json_file

# create output graphml and html files in processed directory
biogrid_interactions_network_graphml = f"{PROCESSED_DIR}/{biogrid_interactions_json_file.replace('.json', '_network.graphml')}"
biogrid_interactions_network_html = f"{PROCESSED_DIR}/{biogrid_interactions_json_file.replace('.json', '_network.html')}"

In [2]:
import json
import networkx as nx
import plotly.graph_objects as go

In [3]:
def create_gene_network(json_data):
    # Create an undirected graph
    G = nx.Graph()
    
    # Parse the JSON data
    interactions = json.loads(json_data)
    
    # Add edges to the graph
    for interaction_id, data in interactions.items():
        gene_a = data['OFFICIAL_SYMBOL_A']
        gene_b = data['OFFICIAL_SYMBOL_B']
        
        # Add nodes with attributes
        G.add_node(gene_a, 
                  entrez_id=data['ENTREZ_GENE_A'],
                  synonyms=data['SYNONYMS_A'])
        G.add_node(gene_b, 
                  entrez_id=data['ENTREZ_GENE_B'],
                  synonyms=data['SYNONYMS_B'])
        
        # Add edge with attributes
        G.add_edge(gene_a, gene_b, 
                  interaction_id=interaction_id,
                  pubmed_id=data['PUBMED_ID'],
                  pubmed_author=data['PUBMED_AUTHOR'],
                  throughput=data['THROUGHPUT'],
                  qualifications=data['QUALIFICATIONS'])
    
    return G

In [4]:
def analyze_network(G):
    # Basic network statistics
    stats = {
        'Number of nodes': G.number_of_nodes(),
        'Number of edges': G.number_of_edges(),
        'Average degree': sum(dict(G.degree()).values()) / G.number_of_nodes(),
        'Density': nx.density(G),
        'Is connected': nx.is_connected(G)
    }
    
    # Node degree analysis
    degree_dict = dict(G.degree())
    stats['Highest degree nodes'] = sorted(degree_dict.items(), 
                                         key=lambda x: x[1], 
                                         reverse=True)[:5]
    
    return stats

In [5]:
def create_interactive_visualization(G):
    # Calculate layout
    pos = nx.spring_layout(G, k=1/pow(len(G.nodes()), 0.3))
    
    # Create edge trace
    edge_x = []
    edge_y = []
    edge_hover_text = []

    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
        
        # Create hover text for edges
        edge_data = G.edges[edge]
        hover_text = f"Interaction: {edge[0]} - {edge[1]}<br>" + \
                    f"PubMed: {edge_data['pubmed_author']} (ID: {edge_data['pubmed_id']})<br>" + \
                    f"Throughput: {edge_data['throughput']}<br>" + \
                    f"Qualifications: {edge_data['qualifications']}"
        # Add hover text only for the line segment, not the gap
        edge_hover_text.extend([hover_text, hover_text, None])

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='text',
        text=edge_hover_text,  # Simplified hover text assignment
        mode='lines'
    )

    # Create node trace
    node_x = []
    node_y = []
    node_hover_text = []
    node_sizes = []
    
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        
        # Calculate node size based on degree
        node_sizes.append(10 + 5 * G.degree(node))
        
        # Create hover text for nodes
        node_data = G.nodes[node]
        hover_text = f"Gene: {node}<br>" + \
                    f"Entrez ID: {node_data['entrez_id']}<br>" + \
                    f"Synonyms: {node_data['synonyms']}<br>" + \
                    f"Degree: {G.degree(node)}"
        node_hover_text.append(hover_text)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        hovertext=node_hover_text,
        marker=dict(
            showscale=True,
            colorscale='YlGnBu',
            size=node_sizes,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            )
        ),
        textposition="top center",
        textfont=dict(size=8)
    )

    # Create the figure
    fig = go.Figure(data=[edge_trace, node_trace],
                   layout=go.Layout(
                       title='Synthetic Lethality Gene Interaction Network',
                       titlefont_size=16,
                       showlegend=False,
                       hovermode='closest',
                       margin=dict(b=20,l=5,r=5,t=40),
                       annotations=[ dict(
                           text="Interactive network visualization",
                           showarrow=False,
                           xref="paper", yref="paper",
                           x=0.005, y=-0.002 ) ],
                       xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                       yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                   )
    
    return fig

In [6]:
# Usage example:

# Read the JSON data
with open(biogrid_interactions_json_src, 'r') as f:
    json_data = f.read()

# Create and analyze the network
G = create_gene_network(json_data)
stats = analyze_network(G)

# write the network to a file
nx.write_graphml(G, biogrid_interactions_network_graphml)


# Print network statistics
print("\nNetwork Statistics:")
for key, value in stats.items():
    print(f"{key}: {value}")

# Create interactive visualization and save to HTML
fig = create_interactive_visualization(G)
fig.write_html(biogrid_interactions_network_html)
print(f"\nVisualization has been saved to {biogrid_interactions_network_html}")
print("You can open this file in any web browser to view the interactive network.")


Network Statistics:
Number of nodes: 208
Number of edges: 207
Average degree: 1.9903846153846154
Density: 0.009615384615384616
Is connected: True
Highest degree nodes: [('TP53', 207), ('MTOR', 1), ('KDR', 1), ('EGFR', 1), ('CDK4', 1)]

Visualization has been saved to ../data/processed/TP53_interactions_network.html
You can open this file in any web browser to view the interactive network.
