# 0. Import libraries

In [None]:
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

import networkx as nx

In [None]:
import celloracle as co
co.__version__

### Settings

In [None]:
# visualization settings
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.rcParams['figure.figsize'] = [6, 4.5]
plt.rcParams["savefig.dpi"] = 300

In [None]:
# Create folder for data
save_folder_data = "data"
os.makedirs(save_folder_data, exist_ok=True)

# Create folder for figures
save_folder_figures = "figures"
os.makedirs(save_folder_figures, exist_ok=True)

# 1. Load data

In [None]:
# You can load files with the following command.
links = co.load_hdf5(file_path= os.path.join(save_folder_data, "day14_preprocessed_links.celloracle.links"))

In [None]:
# Retrieve filtered network data
GRN_df_cycling = links.filtered_links[links.cluster[0]]
GRN_df_moderate_cyclers = links.filtered_links[links.cluster[1]]
GRN_df_non_cycling = links.filtered_links[links.cluster[2]]

# 2. General checks and data exploration

In [None]:
## Check if the nodes are all as both node and target

# Cycling
nodes_out_cycling = GRN_df_cycling['source'].unique()
nodes_in_cycling = GRN_df_cycling['target'].unique()
print(np.setdiff1d(nodes_out_cycling,nodes_in_cycling))

# Moderate cyclers
nodes_out_moderate_cyclers = GRN_df_moderate_cyclers['source'].unique()
nodes_in_moderate_cyclers = GRN_df_moderate_cyclers['target'].unique()
print(np.setdiff1d(nodes_out_moderate_cyclers,nodes_in_moderate_cyclers))

# Non-cycling
nodes_out_non_cycling = GRN_df_non_cycling['source'].unique()
nodes_in_non_cycling = GRN_df_non_cycling['target'].unique()
print(np.setdiff1d(nodes_out_non_cycling,nodes_in_non_cycling))

In [None]:
# Compare length of the GRN tables --> number interactions
print('nr edges in GRN cycling:', len(GRN_df_cycling))
print('nr edges in GRN moderate cyclers:', len(GRN_df_moderate_cyclers))
print('nr edges in GRN non-cycling:', len(GRN_df_non_cycling))
GRN_df_cycling

Each GRN contains 2000 edges, because only the 2000 top ranked edges were considered for the GRN

In [None]:
# Check nodes per GRN
nodes_cycling = GRN_df_cycling['source'].unique()
# out_degree_cycling = GRN_df_cycling['source'].value_counts()
print('nr source nodes cycling:', len(nodes_cycling))

nodes_moderate_cyclers = GRN_df_moderate_cyclers['source'].unique()
# out_degree_moderate_cyclers = GRN_df_moderate_cyclers['source'].value_counts()
print('nr source nodes moderate cyclers:', len(nodes_moderate_cyclers))

nodes_non_cycling = GRN_df_non_cycling['source'].unique()
# out_degree_non_cycling = GRN_df_non_cycling['source'].value_counts()
print('nr source nodes non-cycling:', len(nodes_non_cycling))


In [None]:
# Check nodes per GRN
nodes_target_cycling = GRN_df_cycling['target'].unique()
print('nr target nodes cycling:', len(nodes_target_cycling))

nodes_target_moderate_cyclers = GRN_df_moderate_cyclers['target'].unique()
print('nr target nodes moderate cyclers:', len(nodes_target_moderate_cyclers))

nodes_target_non_cycling = GRN_df_non_cycling['target'].unique()
print('nr target nodes non-cycling:', len(nodes_target_non_cycling))

### Manual calculation of out-degree --> Probably not correct

In [None]:
# # Retrieve source TFs (=with out degree) common in all persister type GRNs
# common_TFs = np.intersect1d(np.intersect1d(nodes_cycling,nodes_non_cycling), nodes_moderate_cyclers)
# print('nr common source TFs in out degree', len(common_TFs))

# ## Retrieve unique TFs per persister cell type
# print("\nUnique nodes - not present in any other graph/GRN")
# # Cycling
# unique_TFs_cycling = np.setdiff1d(np.setdiff1d(nodes_cycling,nodes_non_cycling), nodes_moderate_cyclers)
# print('nr unique source TFs in out degree for cycling cells', len(unique_TFs_cycling), ':', unique_TFs_cycling)
# # Moderate cyclers
# unique_TFs_moderate_cyclers = np.setdiff1d(np.setdiff1d(nodes_moderate_cyclers,nodes_non_cycling), nodes_cycling)
# print('nr unique source TFs in out degree for moderate_cycling cells', len(unique_TFs_moderate_cyclers),':',unique_TFs_moderate_cyclers)
# # Non-cycling
# unique_TFs_non_cycling = np.setdiff1d(np.setdiff1d(nodes_non_cycling,nodes_cycling), nodes_moderate_cyclers)
# print('nr unique source TFs in out degree for non-cycling cells', len(unique_TFs_non_cycling),':',unique_TFs_non_cycling)


# ## Retrieve TFs per persister cell type which are not present in all GRNs (but they can be in one other GRN)
# print("\nUnique nodes compared to common graph nodes")
# # Cycling
# unique_TFs_cycling = np.setdiff1d(nodes_cycling,common_TFs)
# print('nr unique source TFs in out degree for cycling cells', len(unique_TFs_cycling), ':', unique_TFs_cycling)
# # Moderate cyclers
# unique_TFs_moderate_cyclers = np.setdiff1d(nodes_moderate_cyclers,common_TFs)
# print('nr unique source TFs in out degree for moderate_cycling cells', len(unique_TFs_moderate_cyclers),':',unique_TFs_moderate_cyclers)
# # Non-cycling
# unique_TFs_non_cycling = np.setdiff1d(nodes_non_cycling,common_TFs)
# print('nr unique source TFs in out degree for non-cycling cells', len(unique_TFs_non_cycling),':',unique_TFs_non_cycling)


In [None]:
GRN_df_non_cycling#.out_degree()

# 2. NetworkX analysis

In [None]:
def create_graph(df, group_name):
    """
    Create a graph of the dataframe obtained from CellOracle.
    """

    print('\n'+group_name)
    G = nx.DiGraph() # Create an empty directed graph

    # Add nodes from the 'source' and 'target' columns
    G.add_nodes_from(df['source'])
    G.add_nodes_from(df['target'])

    # Add edges from the DataFrame
    edges = [(row['source'], row['target'], row['coef_mean']) for index, row in df.iterrows()]
    G.add_weighted_edges_from(edges)

    # Graph reporting 
    print(f"Number of nodes: {G.number_of_nodes()}")
    print(f"Number of edges: {G.number_of_edges()}")

    # G['GATA2'] # info of example node
    # G.edges['GATA2', 'ADIRF'] # info of example edge
    
    return G

In [None]:
# Create graph from each dataframe
G_cycling = create_graph(GRN_df_cycling, 'Cycling')
G_moderate_cyclers = create_graph(GRN_df_moderate_cyclers, 'Moderate cyclers')
G_non_cycling = create_graph(GRN_df_non_cycling, 'Non-cycling')

##### Intersection of the networks

In [None]:
# Get nodes and edges common in all GRNs

# Copy of the graph
G_common = G_cycling.copy()

# Remove nodes and edges that are not the GRN of moderate cyclers
G_common.remove_nodes_from(n for n in G_cycling if n not in G_moderate_cyclers) # remove nodes which are not in the moderate cyclers GRN
G_common.remove_edges_from(e for e in G_cycling.edges if e not in G_moderate_cyclers.edges) # remove edges which are not in the moderate cyclers GRN
print("Graph info after removal of nodes and edges not in moderate cycling GRN")
print(f"Number of nodes: {G_common.number_of_nodes()}")
print(f"Number of edges: {G_common.number_of_edges()}")

# Remove nodes and edges that are not the GRN of non-cycling cells
G_common.remove_nodes_from(n for n in G_cycling if n not in G_non_cycling) # remove nodes which are not in the non-cycling GRN
G_common.remove_edges_from(e for e in G_cycling.edges if e not in G_non_cycling.edges) # remove edges which are not in the non-cycling GRN
print("Graph info after removal of nodes and edges not in non-cycling GRN")
print(f"Number of nodes: {G_common.number_of_nodes()}")
print(f"Number of edges: {G_common.number_of_edges()}")

##### Unique networks (i.e. networks - common network)

In [None]:
# For each graph remove the edges that are in the common graph

def remove_common_edges(G, G_common):
    # Get copy of the graph
    G_unique = G.copy() 
    print("Graph info before removal of edges in common GRN")
    print(f"Number of nodes: {G_unique.number_of_nodes()}")
    print(f"Number of edges: {G_unique.number_of_edges()}")

    # Remove edges which are common for all groups
    G_unique.remove_edges_from(e for e in G.edges if e in G_common.edges) 
    print("Graph info after removal of edges in common GRN")
    print(f"Number of nodes: {G_unique.number_of_nodes()}")
    print(f"Number of edges: {G_unique.number_of_edges()}")

    # Remove isolates (nodes without neighbors) of the graph
    G_unique.remove_nodes_from(list(nx.isolates(G_unique)))
    print("Graph info after removal of isolates")
    print(f"Number of nodes: {G_unique.number_of_nodes()}")
    print(f"Number of edges: {G_unique.number_of_edges()}")
    
    return G_unique


print('cycling')
G_cycling_unique = remove_common_edges(G_cycling, G_common)
print('\n'+'moderate cycling')
G_moderate_cyclers_unique = remove_common_edges(G_moderate_cyclers, G_common)
print('\n'+'non-cycling')
G_non_cycling_unique = remove_common_edges(G_non_cycling, G_common)

##### Centrality checks

In [None]:
# Centrality checks for cycling
def centrality_checks(G):
    """
    Calculate the in-, out-, and total degree per node of the network/graph. Additionally, sort the nodes based on the degree value in descending order
    """
    total_degree = nx.degree_centrality(G)                                                      # the number of edges each node has
    sorted_total_degree = sorted(total_degree.items(), key=lambda item: item[1], reverse=True)  # Sort the dictionary by values in descending order
    # print(sorted_total_degree)

    out_degree = nx.out_degree_centrality(G)                                                    # the number of edges from a node to targets 
    sorted_out_degree = sorted(out_degree.items(), key=lambda item: item[1], reverse=True)      # Sort the dictionary by values in descending order
    # print(sorted_out_degree)

    in_degree = nx.in_degree_centrality(G)                                                      # the number of edges with that node as target
    sorted_in_degree = sorted(in_degree.items(), key=lambda item: item[1], reverse=True)        # Sort the dictionary by values in descending order
    # print(sorted_in_degree)

    return sorted_total_degree, sorted_out_degree, sorted_in_degree


# Complete/original networks
_,sorted_out_degree_cycling,_ = centrality_checks(G_cycling)
_,sorted_out_degree_moderate_cyclers,_ = centrality_checks(G_moderate_cyclers)
_,sorted_out_degree_non_cycling,_ = centrality_checks(G_non_cycling)

# Network of overlapping nodes and edges
_,sorted_out_degree_common,_ = centrality_checks(G_common)

# Filtered networks - without edges from the common network
_,sorted_out_degree_cycling_unique,_ = centrality_checks(G_cycling_unique)
_,sorted_out_degree_moderate_cyclers_unique,_ = centrality_checks(G_moderate_cyclers_unique)
_,sorted_out_degree_non_cycling_unique,_ = centrality_checks(G_non_cycling_unique)


Original networks:

In [None]:
# For the original networks
# convert out-degrees to a dataframe
df_out_degree_common = pd.DataFrame(list(sorted_out_degree_common[:10]), columns=['Node_Common', 'Out_Degree_Common'])
df_out_degree_cyc = pd.DataFrame(list(sorted_out_degree_cycling[:10]), columns=['Node_Cyc', 'Out_Degree_Cyc'])
df_out_degree_mod = pd.DataFrame(list(sorted_out_degree_moderate_cyclers[:10]), columns=['Node_Mod', 'Out_Degree_Mod'])
df_out_degree_non_cyc = pd.DataFrame(list(sorted_out_degree_non_cycling[:10]), columns=['Node_Non_cyc', 'Out_Degree_Non_cyc'])

# Concatenate the DataFrames column-wise (align by index)
merged_df_out_degree = pd.concat([df_out_degree_common, df_out_degree_cyc, df_out_degree_mod, df_out_degree_non_cyc], axis=1)
merged_df_out_degree


Simplified networks --> where common interactions are removed:

In [None]:
# For unique networks
# convert out-degrees to a dataframe
df_out_degree_cyc_unique = pd.DataFrame(list(sorted_out_degree_cycling_unique[:10]), columns=['Node_Cyc', 'Out_Degree_Cyc'])
df_out_degree_mod_unique = pd.DataFrame(list(sorted_out_degree_moderate_cyclers_unique[:10]), columns=['Node_Mod', 'Out_Degree_Mod'])
df_out_degree_non_cyc_unique = pd.DataFrame(list(sorted_out_degree_non_cycling_unique[:10]), columns=['Node_Non_cyc', 'Out_Degree_Non_cyc'])

# Concatenate the DataFrames column-wise (align by index)
merged_df_out_degree_unique = pd.concat([df_out_degree_common, df_out_degree_cyc_unique, df_out_degree_mod_unique, df_out_degree_non_cyc_unique], axis=1)
merged_df_out_degree_unique


In [None]:
# merged_df_out_degree_unique[merged_df_out_degree_unique['Node_Cyc']=='SP6']

# Visualization

##### Common network

In [None]:
# Draw the graph for common network
pos = nx.spring_layout(G_common) # Define the layout for node positioning
nx.draw(G_common, pos, with_labels=True, node_size=300, node_color='skyblue', font_size=10, font_color='black')
# Display the graph
plt.show()

##### Cycling network

In [None]:
# # Draw the graph for cycling network
# pos = nx.spring_layout(G_cycling) # Define the layout for node positioning
# nx.draw(G_cycling, pos, with_labels=True, node_size=300, node_color='skyblue', font_size=10, font_color='black')
# # Display the graph
# plt.show()

# Draw the graph for unique cycling network - so with the common interactions removed
pos = nx.spring_layout(G_cycling_unique) # Define the layout for node positioning
nx.draw(G_cycling_unique, pos, with_labels=True, node_size=300, node_color='skyblue', font_size=10, font_color='black')
# Display the graph
plt.show()

##### Moderate cyclers

In [None]:
# # Draw the graph for cycling network
# pos = nx.spring_layout(G_moderate_cyclers) # Define the layout for node positioning
# nx.draw(G_moderate_cyclers, pos, with_labels=True, node_size=300, node_color='skyblue', font_size=10, font_color='black')
# # Display the graph
# plt.show()

# Draw the graph for unique cycling network - so with the common interactions removed
pos = nx.spring_layout(G_moderate_cyclers_unique) # Define the layout for node positioning
nx.draw(G_moderate_cyclers_unique, pos, with_labels=True, node_size=300, node_color='skyblue', font_size=10, font_color='black')
# Display the graph
plt.show()

##### Non-cycling network

In [None]:
# # Draw the graph for cycling network
# pos = nx.spring_layout(G_non_cycling) # Define the layout for node positioning
# nx.draw(G_non_cycling, pos, with_labels=True, node_size=300, node_color='skyblue', font_size=10, font_color='black')
# # Display the graph
# plt.show()

# Draw the graph for unique cycling network - so with the common interactions removed
pos = nx.spring_layout(G_non_cycling_unique) # Define the layout for node positioning
nx.draw(G_non_cycling_unique, pos, with_labels=True, node_size=300, node_color='skyblue', font_size=10, font_color='black')
# Display the graph
plt.show()