## Fig. 3 panels A, B, F: local k-NN network
Render a local protein neighborhood network graph centered on a query gene

In [9]:
import networkx as nx
import anndata as ad
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import sys
import copy
from datetime import datetime

script_path = Path.cwd().parent.parent.parent / "script"
sys.path.append(str(script_path))

from utils.knn import *
from external import clustering_workflows

output_dir = Path.cwd() / "output"
output_dir.mkdir(exist_ok=True)

### Load data

In [10]:
timestamp = datetime.now().strftime('%Y-%m-%d')
print(f"Timestamp: {timestamp}")    

Timestamp: 2023-12-04


In [11]:
# manually set the timestamp to use the intermediate results from another date
# timestamp = "2023-12-04"

In [13]:
# load adata
adata_path = Path.cwd().parent.parent / "Fig2" / "panel_D" / "output" / f"adata_{timestamp}.h5ad"

try:
    # load the file
    adata = ad.read_h5ad(adata_path)
except FileNotFoundError:
    print(f"File {adata_path} not found.\nPlease run umap analysis first (fig2 panel D) or specify the correct timestamp, current value is {timestamp}")
except pd.errors.ParserError:
    print(f"There was an error parsing the CSV file at {adata_path}.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

# check data
print(adata)  # 8541 x 61 for reference dataset, and 8017 x 28 for remodeling data

AnnData object with n_obs × n_vars = 8541 × 61
    obs: 'Protein IDs', 'Majority protein IDs', 'Gene_name_canonical', 'organelle_ground_truth_v6.0', 'Graph-based_localization_annotation'


In [14]:
# generate a dictionary to map gene name to annotation
annot_dict = dict(zip(adata.obs["Gene_name_canonical"].to_list(),
                      adata.obs["Graph-based_localization_annotation"].to_list()))

### compute knn graph

In [15]:
knn_adata_path = output_dir / f"adata_kNN_{timestamp}.h5ad"

if not knn_adata_path.exists():
    # instantiate a clusteringworkflow class (to use the part of workflow that computes the nearest neighbor graph)
    kNN_obj = clustering_workflows.ClusteringWorkflow(adata=copy.deepcopy(adata))
    # preprocessing
    kNN_obj.preprocess(n_pcs=None)
    # compute nearest neighbor graph
    kNN_obj.calculate_neighbors(n_pcs=None, n_neighbors=20)
    adata = kNN_obj.adata
    # save a copy of the adata object that contains the kNN graph
    adata.write(knn_adata_path)

### Parameterize the graph

In [16]:
gene = "WASHC5"
get_2nd_order_neighbors = True # show neighbor of neighbors
keep_top_n = 20 # Limit connections per node to this number
node_color_by = "compartment" # two optios: "compartment" or "connections"
category_colors = ['#%02x%02x%02x' % (int(r*255), int(g*255), int(b*255)) for r, g, b in plt.get_cmap('tab10').colors] # color for each cluster, used when node_color_by = "compartment"
# NOTE in the line above, 10 categorical colors (the tab10 collection) are used, you can specify tab20 if you have more than 10 clusters
# you can also use a list of custom colors below:
#category_colors = ['#1f77b4', '#ff7f0e', 'red', '#9467bd', '#9467bd', '#8c564b',] 

# NOTE
# other parameters such as node size, edge width are near the end of this notebook

### Extract data for the local network graph

In [17]:
#first-order gene neighbors
nei_df = get_neighbors(adata, gene, keep_top_n=keep_top_n)

graph_dict = {}
for neighbor, conn_strength in nei_df.loc[gene].items():
    graph_dict[frozenset([gene,neighbor])] = conn_strength

#second-order gene neighbors
if get_2nd_order_neighbors:
    result = get_neighbors_of_neighbors(adata, nei_df.columns,  keep_top_n=keep_top_n)
    #update graph_dict to include 2nd order neighbors
    graph_dict.update(result)

# enforce top-n connections per node
graph_dict = remove_extra_degrees(graph_dict, keep_top_n, query_gene = gene)

# remove nodes that only have one connection,  by default, keep all 1st order neighbors
graph_dict = prune_single_connection_nodes(graph_dict, keep_1st_order_neighbors=True, nei_df=nei_df, gene=gene) 


### Compute graph layout

In [18]:
# construct a graph object in networkx and use the graph dictionary to add edges to the (empty) graph
G = nx.Graph()
for key, val in graph_dict.items():
    if len(key) == 1:
        continue
    G.add_edge(list(key)[0], list(key)[1], weight=round(val,2))

In [19]:
pos = nx.spring_layout(G)  # Use spring_layout for layout
# Save the positions as node attributes
for node, position in pos.items():
    G.nodes[node]['pos'] = position

### Render the graph as a plotly figure

In [20]:
# convert the graph to node and edge "traces" that can be plotted by plotly
node_trace, edge_trace, category_color_map = prepare_plotly_network_graph(G, annot_dict, node_color_by, category_colors)

fig = go.Figure(data=[edge_trace, node_trace],
            layout=go.Layout(
                title=f'Neighbor network graph of {gene}',
                titlefont_size=16,
                height=700,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )

if node_color_by == "compartment": # this block of code adds the legend
    for category, color in category_color_map.items():
        fig.add_trace(go.Scatter(
            x=[None],
            y=[None],
            mode='markers',
            marker=dict(color=color, size=10),
            name=category,
            showlegend=True
        ))

######################
# cutomize the figure#
######################
# update node style
fig.update_traces(marker=dict(size=25, line_width=1)) # line_width is the border width
# update edge style
edge_color = 'grey'
edge_alpha = 0.5
fig.update_traces(line=dict(width=1.5, color=convert_color_to_rgba(edge_color,edge_alpha))) # width is the edge width, color is the edge color
# white background
fig.update_layout(plot_bgcolor='white')
# uncomment the line below if you want to remove protein names from display
fig.data[1].update(mode='markers')

# save figure to file
#fig.write_html(f"{gene}_neighbor_graph.html")
#fig.write_image(f"{gene}_neighbor_graph.pdf") 

# display the figure
fig.show()