# Figure 6, Panel G – Simulate E8.5 IFT Mef2c KD and compare to DEGs

In [3]:
# 0. Import
import os
import sys

import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn2
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

from scipy.io import mmread
from scipy.sparse import csr_matrix

In [4]:
import warnings
warnings.filterwarnings('ignore')
from celloracle import motif_analysis as ma
import celloracle as co
co.__version__

which: no R in (/apps/software/standard/core/jupyterlab/3.6.3-py3.11:/apps/software/standard/core/jupyterlab/3.6.3-py3.11/sbin:/apps/software/standard/core/jupyterlab/3.6.3-py3.11/bin:/home/apc5un/bedtools2/bin:/opt/mam/9.1.2/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/opt/slurm/current/bin:/opt/singularity/current/bin:/opt/rci/bin:/share/rci_apps/common/bin:/share/resources/HPCtools:/opt/mam/current/bin:/opt/apptainer/current/bin)


'0.14.0'

In [5]:
# visualization settings
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

plt.rcParams['figure.figsize'] = [6, 4.5]
plt.rcParams["savefig.dpi"] = 300

#### Load links

In [6]:
thresh_num = 12000

links_wt_e85 = co.load_hdf5(file_path="./data/celloracle/e85/WT_cardiac-subset-links.celloracle.links")

links_wt_e85.filter_links(p=0.001, weight="coef_abs", threshold_number=thresh_num)

In [7]:
links_wt_e85.filtered_links['IFT-CMs_WT'].sort_values('p')

Unnamed: 0,source,target,coef_mean,coef_abs,p,-logp
75646,Nkx2-5,Hspd1,-0.021870,0.021870,6.048811e-27,26.218330
130224,Gli1,Prom1,0.027861,0.027861,8.110916e-27,26.090930
103690,Nkx2-5,Myh6,0.166351,0.166351,5.653225e-26,25.247704
193268,Klf12,Zfp503,0.017696,0.017696,4.921638e-25,24.307890
166675,Klf12,Svep1,0.035120,0.035120,5.317990e-25,24.274253
...,...,...,...,...,...,...
85430,E2f1,Kitl,-0.010865,0.010865,1.242831e-04,3.905588
51810,Klf4,Erbb4,0.010774,0.010774,1.403253e-04,3.852864
180463,Zeb1,Ttn,-0.015477,0.015477,3.115947e-04,3.506410
103702,Gata4,Myh6,-0.011761,0.011761,3.249560e-04,3.488175


#### Set timepoint and load adata with raw readouts (req'd for DEG)

In [8]:
def return_adata_raw(timepoint):
    adata = sc.read_h5ad(f'data/adata_objects/{timepoint}_subset.h5ad')
    
    if timepoint == 'e85':
        names = ['pSHF_WT','pSHF_KO', 'aSHF_WT', 'aSHF_KO', 'IFT-CMs_WT', 'IFT-CMs_KO', 'V-CMs_WT', 
                 'V-CMs_KO', 'OFT-CMs_WT', 'OFT-CMs_KO', 'PhM_WT', 'PhM_KO', 'LPM_WT', 'LPM_KO', 
                 'PostM_WT', 'PostM_KO', 'MixM_WT', 'MixM_KO', 'C16_WT', 'C16_KO']
    elif timepoint == 'e9':
        names = ['SHF_WT', 'SHF_KO', 'Pe_WT', 'Pe_KO', 'VP_WT', 'VP_KO', 'CMs-A_WT', 'CMs-A_KO', 
                        'CMs-AVC_WT', 'CMs-AVC_KO', 'CMs-V_WT', 'CMs-V_KO', 'CMs-OFT_WT', 'CMs-OFT_KO', 
                        'PhM_WT', 'PhM_KO', 'C11_WT', 'C11_KO']        
    else:
        return
    
    mapping_dict = dict(zip(range(0, len(names)), names))
    adata.obs['celltype_x_genotype'] = adata.obs['cell_type_pool_x_genotype'].map(mapping_dict)    
    
    if timepoint == 'e9':
        adata.obs.loc[adata.obs['celltype_x_genotype'] == 'CMs-AVC_WT', 'celltype_x_genotype'] = 'CMs-A_WT'
    
    raw_mtx = mmread(f"./data/adata_objects/{timepoint}_matrix.mtx")
    raw_cells = pd.read_csv(f"./data/adata_objects/{timepoint}_raw_cells.csv", header=None)
    raw_genes = pd.read_csv(f"./data/adata_objects/{timepoint}_raw_genes.csv", header=None)
    x = pd.DataFrame(raw_mtx.toarray())
    x.index = raw_genes.values.T[0]
    
    x = x.T
    x.index = raw_cells.values.flatten()
    raw_cells.index = raw_cells.values.flatten()
    raw_genes.index = raw_genes.values.flatten()
    
    new_adata_raw = sc.AnnData(
        X=x.values,  # Use the normalized and log-transformed data
        var=raw_genes,  # Use the same genes
        obs=raw_cells   # Use the same cells
        )
    
    sc.pp.normalize_total(new_adata_raw, target_sum=1e4)
    sc.pp.log1p(new_adata_raw)
    adata.raw = new_adata_raw
    return adata

In [9]:
timepoint = 'e9'
adata = return_adata_raw(timepoint)

#### Calculate DEG and save results to `de_results`

In [10]:
# Chamber is Atrial because assuming adata_E9 is your AnnData object
chamber = 'A'

# Filter the data to include only the relevant groups
if timepoint == 'e85':
    adata_subset = adata[adata.obs['celltype_x_genotype'].isin([f'{chamber}-CMs_KO', f'{chamber}-CMs_WT'])]
elif timepoint == 'e9':
    adata_subset = adata[adata.obs['celltype_x_genotype'].isin([f'CMs-{chamber}_KO', f'CMs-{chamber}_WT'])]
else:
    print('oops')
    
# adata_subset.X = adata_subset.X + 8

sc.tl.rank_genes_groups(adata_subset, groupby='celltype_x_genotype', method='wilcoxon')

# Extract the results into a DataFrame
# import pandas as pd
if timepoint == 'e85':
    de_results = pd.DataFrame(
        {
            'genes': adata_subset.uns['rank_genes_groups']['names'][f'{chamber}-CMs_KO'],
            'logfoldchanges': adata_subset.uns['rank_genes_groups']['logfoldchanges'][f'{chamber}-CMs_KO'],
            'pvals': adata_subset.uns['rank_genes_groups']['pvals'][f'{chamber}-CMs_KO'],
            'pvals_adj': adata_subset.uns['rank_genes_groups']['pvals_adj'][f'{chamber}-CMs_KO']
        }
    )
elif timepoint == 'e9':
    de_results = pd.DataFrame(
        {
            'genes': adata_subset.uns['rank_genes_groups']['names'][f'CMs-{chamber}_KO'],
            'logfoldchanges': adata_subset.uns['rank_genes_groups']['logfoldchanges'][f'CMs-{chamber}_KO'],
            'pvals': adata_subset.uns['rank_genes_groups']['pvals'][f'CMs-{chamber}_KO'],
            'pvals_adj': adata_subset.uns['rank_genes_groups']['pvals_adj'][f'CMs-{chamber}_KO']
        }
    )
else:
    print('oops2')
    
de_results['genes'] = de_results['genes'].apply(lambda x: x[0] if isinstance(x, tuple) else x)

#### Function to get coefficient matrix for perturbation study

In [11]:
def get_coef_matrix(links_df):
    all_genes = np.unique(list(links_df['source'].values) + list(links_df['target'].values))

    all_coefficients = np.zeros((len(all_genes), len(all_genes)))
    coefficients_matrix = pd.DataFrame(all_coefficients, index=all_genes, columns=all_genes)

    for row in links_df.itertuples():
        coefficients_matrix.at[row.source, row.target] = row.coef_mean
    return coefficients_matrix

#### Function to calculate gene change after TF perturbation

In [12]:
def get_gene_change(genes_to_change, coef_matrix, iterations):
    delta_x = np.zeros(coef_matrix.shape[0])

    for g in genes_to_change:
        delta_x[coef_matrix.index.get_loc(g)] = -1

    change = np.zeros(coef_matrix.shape[0])

    for i in range(0, iterations):
        change = change + np.matmul(delta_x, coef_matrix.values)
        delta_x = delta_x + change


    change_dict = dict(zip([x for x in coef_matrix.index], [x for x in change]))

    change_df = pd.DataFrame({'name': [x for x in coef_matrix.index],
                             'delta_x': [x for x in change]})


    pos_genes = []
    neg_genes = []

    for g, val in change_dict.items():
        if val > 0:
            pos_genes.append(g)
        if val < 0: 
            neg_genes.append(g)

    return change_dict, pos_genes, neg_genes, change_df

#### Compare DEG to network simulation of TF knockdown (Panel G Results)

In [18]:
tf = 'Mef2c'

coef_matrix = get_coef_matrix(links_wt_e85.filtered_links['IFT-CMs_WT'])
gene_change, up_genes, down_genes, change_df = get_gene_change([tf], coef_matrix, iterations=1)
de_lost_in_ko = de_results[de_results.logfoldchanges < -.75]
de_gained_in_ko = de_results[de_results.logfoldchanges >.75]

i = 0
print('(POSITIVE RESULT) Genes our model predicts will decrease in KO and are actually downregulated in KO.')

for g in change_df[change_df['delta_x'] < 0].sort_values('delta_x')['name'].values:
    if g in de_lost_in_ko['genes'].values:
        print(g)
        i += 1
tp = i
print(i)
print('\n')

print('(POSITIVE RESULT) Genes our model predicts will increase in KO and actually increase in KO')
i = 0
for g in change_df[change_df['delta_x'] > 0].sort_values('delta_x')['name'].values:
    if g in de_gained_in_ko['genes'].values:
        print(g)
        i += 1
tn = i
print(i)
print('\n')

print('(NEGATIVE RESULT) Genes our model predicts will decrease in KO, BUT actually INCREASE in KO.')
i = 0
for g in change_df[change_df['delta_x'] < 0].sort_values('delta_x')['name'].values:
    if g in de_gained_in_ko['genes'].values:
        print(g)
        i += 1
fp = i
print(i)

print('\n')
print('(NEGATIVE RESULT) Genes our model predicts will increase in KO, BUT actually DECREASE in KO.')
i = 0
for g in change_df[change_df['delta_x'] > 0].sort_values('delta_x')['name'].values:
    if g in de_lost_in_ko['genes'].values:
        print(g)
        i += 1
fn = i
print(i)


print(f"{tf}: Accuracy of: {(tp + tn) / (tp+tn+fp+fn)}, and num tf + tn = {tp+tn}, and num fp + fn = {fp + fn}")

(POSITIVE RESULT) Genes our model predicts will decrease in KO and are actually downregulated in KO.
Ttn
Actc1
Unc45b
Myh6
Ldb3
Tnni1
Ppp1r14c
Myl4
Tnnc1
Myom1
5430431A17Rik
Csrp3
Filip1
Cpeb2
Ctnna3
Hdac9
Mybpc3
Rrad
Fam49a
Homer2
Ntng1
Dusp27
Tnnt2
Vcan
Myo18b
Mat2a
Sptb
Adcy5
Tmem163
Mmd
Tmod1
Reep1
Actn2
Smyd1
Kank3
Hrc
Trim55
Grb14
Usp2
Thbs4
Arhgap23
Purb
Ptges3l
43


(POSITIVE RESULT) Genes our model predicts will increase in KO and actually increase in KO
Lamp2
Rcn3
Cnksr2
Cdh6
Arhgap29
Slc2a3
2610307P16Rik
Peg3
Hs6st2
Igf2
Apoe
F2r
Igfbp2
Pgrmc1
Ptgis
Phlda2
Sfrp1
Kcnab1
18


(NEGATIVE RESULT) Genes our model predicts will decrease in KO, BUT actually INCREASE in KO.
Igfbp5
Plcb1
Tenm4
Auts2
Esrrg
Fras1
Grb10
Unc5c
Lpp
Camk1d
Foxp1
Col25a1
Akap5
Ephb2
Syne1
Plcl1
16


(NEGATIVE RESULT) Genes our model predicts will increase in KO, BUT actually DECREASE in KO.
Dpysl3
Tgfbr3
Lypd6
Rspo3
Hand1
Ccnd2
6
Mef2c: Accuracy of: 0.7349397590361446, and num tf + tn = 61, and num fp + fn =