# EDA on GRN temporal dynamics

- Last updated: 1/18/2024
- Author: Yang-Joon Kim

Description/notes:
- Exploratry data analysis on GRNs from different timepoints.

- Analyses where we'd like to see how the GRN evolves over time/development.
    - First, for the same cell-type (progenitor, or in intermediate fate), how does the GRN evolves over the developmental stages (real-time).
    - [Dictys] Second, for the same dev stage, how does the GRN evolves over the developmental trajectories (mesoderm/neuroectoderm trajectories, for example). 
    - From these analyses, can we learn a transient key driver genes/TFs that were unidentifiable from "static" GRNs?

- Note that the datasets that we're using are not "finalized", therefore our goal is not relying too much on the end product, but rather establishing a conceptual framework for which analyses would be relevant for "finalized" data.

In [1]:
# 0. Import

import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

In [2]:
import celloracle as co
co.__version__

  def twobit_to_dna(twobit: int, size: int) -> str:
  def dna_to_twobit(dna: str) -> int:
  def twobit_1hamming(twobit: int, size: int) -> List[int]:
INFO:matplotlib.font_manager:Failed to extract font properties from /usr/share/fonts/google-noto-emoji/NotoColorEmoji.ttf: In FT2Font: Can not load face (unknown file format; error code 0x2)


'0.14.0'

In [3]:
# visualization settings
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.rcParams['figure.figsize'] = [6, 4.5]
plt.rcParams["savefig.dpi"] = 600

In [4]:
figpath = "/hpc/projects/data.science/yangjoon.kim/zebrahub_multiome/zebrahub-multiome-analysis/figures/EDA_GRN_dynamics_timepoints/"
os.makedirs(figpath, exist_ok=True)

## Step 1. Import the GRNs (Links object)

In [5]:
# import the GRNs (Links objects)
TDR118_GRN = co.load_hdf5("/hpc/projects/data.science/yangjoon.kim/zebrahub_multiome/data/processed_data/sequencing_ver1/TDR118_cicero_output/08_TDR118_celltype_GRNs.celloracle.links")
TDR119_GRN = co.load_hdf5("/hpc/projects/data.science/yangjoon.kim/zebrahub_multiome/data/processed_data/sequencing_ver1/TDR119_cicero_output/08_TDR119_celltype_GRNs.celloracle.links")

In [8]:
# Import 3 GRNs from 16,19, and 24hpf, respectively.
# Note that we chose TDR118 for 16hpf
GRN_16hpf = co.load_hdf5("/hpc/projects/data.science/yangjoon.kim/zebrahub_multiome/data/processed_data/sequencing_ver1/TDR118_cicero_output/08_TDR118_celltype_GRNs.celloracle.links")
GRN_19hpf = co.load_hdf5("/hpc/projects/data.science/yangjoon.kim/zebrahub_multiome/data/processed_data/sequencing_ver1/TDR125_cicero_output/08_TDR125_celltype_GRNs.celloracle.links")
GRN_24hpf = co.load_hdf5("/hpc/projects/data.science/yangjoon.kim/zebrahub_multiome/data/processed_data/sequencing_ver1/TDR124_cicero_output/08_TDR124_celltype_GRNs.celloracle.links")

In [12]:
# extract filtered GRNs (filtered_links) as "dictionary"
dict_GRN_16hpf = GRN_16hpf.filtered_links
dict_GRN_16hpf

dict_GRN_19hpf = GRN_19hpf.filtered_links
dict_GRN_19hpf

dict_GRN_24hpf = GRN_24hpf.filtered_links
dict_GRN_24hpf

{'Adaxial_Cells':         source    target  coef_mean  coef_abs             p      -logp
 64807   nr2f1b    hmgb2a   0.107982  0.107982  1.847119e-13  12.733505
 64957   hmga1a     hmgn2   0.107153  0.107153  1.941999e-16  15.711751
 129220    tp53   serbp1a   0.102069  0.102069  4.345155e-14  13.361995
 151333  sox21a    tmsb4x   0.086323  0.086323  2.243270e-12  11.649119
 68468   hmga1a  hsp90ab1   0.085411  0.085411  5.726644e-20  19.242100
 ...        ...       ...        ...       ...           ...        ...
 98065    isl2b     nop56   0.009157  0.009157  2.744661e-04   3.561511
 61321    pax7a     hbbe3  -0.009153  0.009153  1.496054e-07   6.825053
 123758    lhx6     rplp0   0.009149  0.009149  7.993036e-05   4.097288
 22536    pax3a      cdon   0.009147  0.009147  1.384234e-04   3.858791
 68391    sox9a  hsp90ab1   0.009142  0.009142  7.577885e-04   3.120452
 
 [2000 rows x 6 columns],
 'Differentiating_Neurons':         source    target  coef_mean  coef_abs             p    

In [15]:
dict_GRN_16hpf.keys()

dict_keys(['Adaxial_Cells', 'Differentiating_Neurons', 'Endoderm', 'Epidermal', 'Lateral_Mesoderm', 'Muscle', 'NMPs', 'Neural_Anterior', 'Neural_Crest', 'Neural_Posterior', 'Notochord', 'PSM', 'Somites', 'unassigned'])

In [16]:
dict_GRN_19hpf.keys()

dict_keys(['Differentiating_Neurons', 'Endoderm', 'Epidermal', 'Lateral_Mesoderm', 'Muscle', 'NMPs', 'Neural_Anterior', 'Neural_Crest', 'Neural_Posterior', 'Notochord', 'PSM', 'Somites', 'unassigned'])

In [17]:
dict_GRN_24hpf.keys()

dict_keys(['Adaxial_Cells', 'Differentiating_Neurons', 'Endoderm', 'Epidermal', 'Muscle', 'Neural_Anterior', 'Neural_Crest', 'Neural_Posterior', 'PSM', 'Somites', 'unassigned'])

In [29]:
# Choose a cell-type of interest (that is present for all timepoints)
# Note that we will have to consider the edge case where the celltype is only transient for specific timepoints
ct = "PSM"

dict_GRN_16hpf[ct]

GRN_16hpf.merged_score[GRN_16hpf.merged_score["cluster"]==ct].sort_values("degree_centrality_all", ascending=False)

Unnamed: 0,degree_all,degree_in,degree_out,clustering_coefficient,clustering_coefficient_weighted,degree_centrality_all,degree_centrality_in,degree_centrality_out,betweenness_centrality,closeness_centrality,eigenvector_centrality,page_rank,assortative_coefficient,average_path_length,community_random_walk,module,connectivity,participation,role,cluster
meox1,77,16,61,0.064688,0.069812,0.171875,0.035714,0.136161,10509,0.100778,1.000000,0.003327,-0.278757,0.089486,1,2,4.812401,0.611747,Connector Hub,PSM
hmga1a,66,47,19,0.040385,0.066425,0.147321,0.104911,0.042411,6481,0.051747,0.976142,0.012195,-0.278757,0.089486,4,0,4.061563,0.559053,Connector Hub,PSM
apoc1,65,65,0,0.019712,0.030708,0.145089,0.145089,0.000000,0,,0.773270,0.021161,-0.278757,0.089486,4,4,4.948343,0.701538,Connector Hub,PSM
hoxb3a,54,10,44,0.097518,0.105866,0.120536,0.022321,0.098214,3008,0.089543,0.629265,0.002715,-0.278757,0.089486,1,2,3.365062,0.576389,Connector Hub,PSM
apoeb,49,49,0,0.020408,0.025567,0.109375,0.109375,0.000000,0,,0.414441,0.014596,-0.278757,0.089486,4,4,4.755907,0.586422,Connector Hub,PSM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
tjp2b,1,1,0,0.000000,0.000000,0.002232,0.002232,0.000000,0,,0.039810,0.001475,-0.278757,0.089486,1,2,-0.687486,0.000000,Ultra peripheral,PSM
foxi3a,1,0,1,0.000000,0.000000,0.002232,0.000000,0.002232,0,47.713347,0.012040,0.001432,-0.278757,0.089486,4,3,-0.786730,0.000000,Ultra peripheral,PSM
gcm2,1,0,1,0.000000,0.000000,0.002232,0.000000,0.002232,0,49.841032,0.013915,0.001432,-0.278757,0.089486,4,4,-0.439853,0.000000,Ultra peripheral,PSM
onecut1,1,0,1,0.000000,0.000000,0.002232,0.000000,0.002232,0,0.067257,0.013537,0.001432,-0.278757,0.089486,5,3,-0.786730,0.000000,Ultra peripheral,PSM


In [31]:
ct = "PSM"

GRN_19hpf.merged_score[GRN_19hpf.merged_score["cluster"]==ct].sort_values("degree_centrality_all", ascending=False)

Unnamed: 0,degree_all,degree_in,degree_out,clustering_coefficient,clustering_coefficient_weighted,degree_centrality_all,degree_centrality_in,degree_centrality_out,betweenness_centrality,closeness_centrality,eigenvector_centrality,page_rank,assortative_coefficient,average_path_length,community_random_walk,module,connectivity,participation,role,cluster
meox1,66,10,56,0.047115,0.058276,0.141631,0.021459,0.120172,2006,0.110970,1.000000,0.004245,-0.244645,0.068915,2,0,3.531464,0.681657,Connector Hub,PSM
meis1b,66,17,49,0.092166,0.105778,0.141631,0.036481,0.105150,6910,0.119120,0.717997,0.003852,-0.244645,0.068915,2,6,3.621357,0.630385,Connector Hub,PSM
apoc1,63,63,0,0.014337,0.019224,0.135193,0.135193,0.000000,0,,0.771168,0.021658,-0.244645,0.068915,5,3,5.477357,0.690854,Connector Hub,PSM
hmga1a,61,30,31,0.093443,0.119594,0.130901,0.064378,0.066524,4550,0.063072,0.959630,0.007888,-0.244645,0.068915,4,5,4.384768,0.567589,Connector Hub,PSM
rxraa,53,15,38,0.111702,0.131984,0.113734,0.032189,0.081545,8084,0.097898,0.922249,0.004217,-0.244645,0.068915,2,0,2.538473,0.679688,Connector Hub,PSM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
hand2,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,79.635835,0.008655,0.001426,-0.244645,0.068915,24,6,-0.840113,0.000000,Ultra peripheral,PSM
tp73,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,0.088650,0.017790,0.001426,-0.244645,0.068915,2,6,-0.840113,0.000000,Ultra peripheral,PSM
dlx4a,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,75.111678,0.005264,0.001426,-0.244645,0.068915,22,5,-0.839126,0.000000,Ultra peripheral,PSM
tp63,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,0.057291,0.008221,0.001426,-0.244645,0.068915,21,5,-0.839126,0.000000,Ultra peripheral,PSM


In [32]:
ct = "PSM"

GRN_19hpf.merged_score[GRN_19hpf.merged_score["cluster"]==ct].sort_values("degree_centrality_all", ascending=False)

Unnamed: 0,degree_all,degree_in,degree_out,clustering_coefficient,clustering_coefficient_weighted,degree_centrality_all,degree_centrality_in,degree_centrality_out,betweenness_centrality,closeness_centrality,eigenvector_centrality,page_rank,assortative_coefficient,average_path_length,community_random_walk,module,connectivity,participation,role,cluster
meox1,66,10,56,0.047115,0.058276,0.141631,0.021459,0.120172,2006,0.110970,1.000000,0.004245,-0.244645,0.068915,2,0,3.531464,0.681657,Connector Hub,PSM
meis1b,66,17,49,0.092166,0.105778,0.141631,0.036481,0.105150,6910,0.119120,0.717997,0.003852,-0.244645,0.068915,2,6,3.621357,0.630385,Connector Hub,PSM
apoc1,63,63,0,0.014337,0.019224,0.135193,0.135193,0.000000,0,,0.771168,0.021658,-0.244645,0.068915,5,3,5.477357,0.690854,Connector Hub,PSM
hmga1a,61,30,31,0.093443,0.119594,0.130901,0.064378,0.066524,4550,0.063072,0.959630,0.007888,-0.244645,0.068915,4,5,4.384768,0.567589,Connector Hub,PSM
rxraa,53,15,38,0.111702,0.131984,0.113734,0.032189,0.081545,8084,0.097898,0.922249,0.004217,-0.244645,0.068915,2,0,2.538473,0.679688,Connector Hub,PSM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
hand2,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,79.635835,0.008655,0.001426,-0.244645,0.068915,24,6,-0.840113,0.000000,Ultra peripheral,PSM
tp73,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,0.088650,0.017790,0.001426,-0.244645,0.068915,2,6,-0.840113,0.000000,Ultra peripheral,PSM
dlx4a,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,75.111678,0.005264,0.001426,-0.244645,0.068915,22,5,-0.839126,0.000000,Ultra peripheral,PSM
tp63,1,0,1,0.000000,0.000000,0.002146,0.000000,0.002146,0,0.057291,0.008221,0.001426,-0.244645,0.068915,21,5,-0.839126,0.000000,Ultra peripheral,PSM


In [34]:
ct = "PSM"

GRN_24hpf.merged_score[GRN_24hpf.merged_score["cluster"]==ct].sort_values("degree_centrality_all", ascending=False)

Unnamed: 0,degree_all,degree_in,degree_out,clustering_coefficient,clustering_coefficient_weighted,degree_centrality_all,degree_centrality_in,degree_centrality_out,betweenness_centrality,closeness_centrality,eigenvector_centrality,page_rank,assortative_coefficient,average_path_length,community_random_walk,module,connectivity,participation,role,cluster
hmga1a,86,65,21,0.083447,0.093731,0.231183,0.174731,0.056452,1221,3.084511,1.000000,0.016021,-0.04954,0.020356,2,3,4.712182,0.681179,Connector Hub,PSM
actb1,70,70,0,0.014907,0.020163,0.188172,0.188172,0.000000,0,,0.491866,0.018411,-0.04954,0.020356,2,0,5.587235,0.729388,Connector Hub,PSM
hsp90ab1,68,68,0,0.021071,0.028409,0.182796,0.182796,0.000000,0,,0.632795,0.017102,-0.04954,0.020356,2,4,3.411550,0.643599,Connector Hub,PSM
ptmab,67,67,0,0.007237,0.008942,0.180108,0.180108,0.000000,0,,0.594568,0.011915,-0.04954,0.020356,2,4,2.929332,0.687458,Connector Hub,PSM
hmgn2,61,61,0,0.026230,0.039394,0.163978,0.163978,0.000000,0,,0.695886,0.009717,-0.04954,0.020356,2,4,2.808778,0.654125,Connector Hub,PSM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
cygb1,1,1,0,0.000000,0.000000,0.002688,0.002688,0.000000,0,,0.002228,0.001820,-0.04954,0.020356,2,0,-0.676481,0.000000,Ultra peripheral,PSM
traf4a,1,1,0,0.000000,0.000000,0.002688,0.002688,0.000000,0,,0.001738,0.001756,-0.04954,0.020356,1,0,-0.676481,0.000000,Ultra peripheral,PSM
colec12,1,1,0,0.000000,0.000000,0.002688,0.002688,0.000000,0,,0.002641,0.001740,-0.04954,0.020356,2,0,-0.676481,0.000000,Ultra peripheral,PSM
slc6a8,1,1,0,0.000000,0.000000,0.002688,0.002688,0.000000,0,,0.002183,0.001757,-0.04954,0.020356,1,0,-0.676481,0.000000,Ultra peripheral,PSM


In [35]:
# check the genes that appear/disappear over time within the GRNs
ct = "PSM"

# extract the GRNs for specific cell-types
df1 = dict_GRN_16hpf[ct]
df2 = dict_GRN_19hpf[ct]
df3 = dict_GRN_24hpf[ct]

# Extracting unique genes from each dataframe
genes_16hpf = set(df1['source']).union(set(df1['target']))
genes_19hpf = set(df2['source']).union(set(df2['target']))
genes_24hpf = set(df3['source']).union(set(df3['target']))


In [37]:
# Finding genes that appear or disappear
genes_appeared = (genes_19hpf.union(genes_24hpf)).difference(genes_16hpf)
genes_disappeared = genes_16hpf.difference(genes_19hpf.union(genes_24hpf))

print("Genes appeared:", genes_appeared)
print("Genes disappeared:", genes_disappeared)

Genes appeared: {'ldb3a', 'morc3b', 'atp2a1', 'adamts3', 'ap2m1a', 'hs3st3b1a', 'bcas2', 'hoxc9a', 'hoxb8a', 'dbx1a', 'ipo7', 'mcm6', 'traf4a', 'fosl2', 'gadd45gb.1', 'foxa3', 'tmem88b', 'atoh1a', 'bsx', 'nusap1', 'hand2', 'efhd1', 'fsta', 'myl1', 'phlda2', 'isl1l', 'tmsb', 'myo1cb', 'dld', 'otpb', 'tbr1a', 'col11a1b', 'mfap2', 'ackr3b', 'si:ch73-21g5.7', 'cd63', 'spi2', 'lbx1b', 'slc6a8', 'bhlhe40', 'tp53', 'gfi1aa', 'sox1b', 'pdap1a', 'nkx6.2', 'noto', 'sfrp5', 'klf2a', 'rpsa', 'ppfia4', 'ek1', 'lrrc17', 'pmp22a', 'nkx6.3', 'glcci1a', 'ncam1b', 'pitx3', 'cdh6', 'mitfa', 'fosab', 'cdh15', 'elavl3', 'limch1a', 'nr5a1b', 'dlx2a', 'neurod6b', 'tbx21', 'neo1a', 'actc1b', 'hoxa13a', 'bhlhe23', 'foxa', 'colec12', 'rdh10a', 'grhl2b', 'sox14', 'nr4a2a', 'mylpfa', 'her15.2', 'foxg1d', 'mafb', 'prrx1b', 'mycn', 'midn', 'slc25a37', 'alas2', 'musk', 'ccng1', 'her7', 'col1a1a', 'nkx1.2lb', 'eif5a2', 'znrf3', 'col1a2', 'postnb', 'kpna2', 'sec61a1l', 'jam2a', 'emx2', 'ryr1b', 'hbae1.3', 'pax8', 'otx