In [1]:
import pandas as pd
import numpy as np
import  os
from functools import reduce
from collections import Counter

In [2]:
libs = ["ATF2", "CTCF", "FOXA1", "LEF1", "SCRT1", "TCF7L2", "16P12_1"]

In [3]:
meta_enhancer_gene_file = "/data5/deepro/starrseq/papers/results/6_link_da_enhancers_to_de_genes/data/meta_enhancer_gene.csv"

df = pd.read_csv(meta_enhancer_gene_file)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
def get_act_status(ser, lib):
    cond = "unresponsive"
    if ser[f"{lib}_padj"]< 0.01:
        if ser[f"{lib}_log2FoldChange"]>0:
            cond="induced"
            if (ser["CC_peak"]==0)&(ser[f"{lib}_peak"]==1):
                cond="gained"
        else:
            cond="repressed"
            if (ser["CC_peak"]==1)&(ser[f"{lib}_peak"]==0):
                cond="lost"
    else:
        if (ser["CC_peak"]==1)&(ser[f"{lib}_peak"]==1):
            cond="active"
        elif (ser["CC_peak"]==0)&(ser[f"{lib}_peak"]==0):
            cond="inactive"      
    return cond


In [5]:
for lib in libs:
    df[lib] = df.apply(get_act_status, args=(lib,), axis=1)

In [6]:
usecols = ["chrom_coord"] + libs + ["abc_gene", "nearest_gene"]

In [7]:
meta_enhancer_gene_df = df.loc[:, usecols]

In [8]:
meta_enhancer_gene_df.ATF2.value_counts()

inactive        231920
repressed        11525
induced           8070
lost              1338
unresponsive       386
active             339
gained              54
Name: ATF2, dtype: int64

In [9]:
meta_exp_file = "/data5/deepro/starrseq/papers/results/5_compare_expression_ko_vs_wt/data/meta_exp.csv"
meta_exp_df = pd.read_csv(meta_exp_file)

In [10]:
def get_gene_status_helper(gene_id, libs, meta_exp_df):
    gene_exp_df = meta_exp_df.loc[meta_exp_df.gene_id==gene_id]
    assert len(gene_exp_df)<2
    if len(gene_exp_df)==0:
        print(gene_id)
        return {lib:"not_available" for lib in libs}
    gexp_ser = gene_exp_df.squeeze(axis=0)
    data_dict = dict()
    for lib in libs:
        response="not_significant"
        if gexp_ser[f"{lib}_padj"]<0.05:
            if gexp_ser[f"{lib}_log2FoldChange"]>0:
                response="upregulated"
            else:
                response="downregulated"
        data_dict[lib]=response
    return data_dict

def get_gene_status(gene, libs, meta_exp_df):
    if not pd.isna(gene):
        genes = gene.split("|")
        all_gene_data_dict = {lib:"" for lib in libs}
        for gene in genes:
            gene_dict = get_gene_status_helper(gene, libs, meta_exp_df)
            for lib in libs:
                all_gene_data_dict[lib] = "|".join([all_gene_data_dict[lib], gene_dict[lib]])
        return pd.Series({lib:all_gene_data_dict[lib].strip("|") for lib in libs})
    return pd.Series({lib: pd.NA for lib in libs})


In [11]:
meta_enhancer_gene_df[[f"{lib}_abc_gene" for lib in libs]] = meta_enhancer_gene_df.abc_gene.apply(get_gene_status, args=(libs, meta_exp_df, ))

In [12]:
meta_enhancer_gene_df[[f"{lib}_nearest_gene" for lib in libs]] = meta_enhancer_gene_df.nearest_gene.apply(get_gene_status, args=(libs, meta_exp_df, ))

In [13]:
query = "~" + "(" + \
    "(" + "&".join([f"(`{lib}`=='inactive')" for lib in libs]) + ")" + "|" + \
    "(" + "&".join([f"(`{lib}`=='active')" for lib in libs]) + ")" + \
    ")"
diff_meta_enhancer_gene = meta_enhancer_gene_df.query(query)

In [47]:
def get_per_lib_consistency_stats(lib, diff_meta_enhancer_gene):
    use_cols = ["chrom_coord", f"{lib}", f"{lib}_abc_gene", f"{lib}_nearest_gene"]
    lib_diff_meta = diff_meta_enhancer_gene.loc[:, use_cols]
    mapped_tg = len(lib_diff_meta.loc[
    ~((lib_diff_meta[f"{lib}_abc_gene"].isna())& (lib_diff_meta[f"{lib}_nearest_gene"].isna()))
    ])
    enhancer_dict = Counter(lib_diff_meta[f"{lib}"])
    induced_consistent = len(lib_diff_meta.loc[(
    ((lib_diff_meta[lib]=="induced"))&
    ((lib_diff_meta[f"{lib}_abc_gene"].str.contains("upregulated")|lib_diff_meta[f"{lib}_nearest_gene"].str.contains("upregulated")))
    )])
    gained_consistent = len(lib_diff_meta.loc[(
    ((lib_diff_meta[lib]=="gained"))&
    ((lib_diff_meta[f"{lib}_abc_gene"].str.contains("upregulated")|lib_diff_meta[f"{lib}_nearest_gene"].str.contains("upregulated")))
    )])
    repressed_consistent = len(lib_diff_meta.loc[(
    ((lib_diff_meta[lib]=="repressed"))&
    ((lib_diff_meta[f"{lib}_abc_gene"].str.contains("downregulated")|lib_diff_meta[f"{lib}_nearest_gene"].str.contains("downregulated")))
    )])
    lost_consistent = len(lib_diff_meta.loc[(
    ((lib_diff_meta[lib]=="lost"))&
    ((lib_diff_meta[f"{lib}_abc_gene"].str.contains("downregulated")|lib_diff_meta[f"{lib}_nearest_gene"].str.contains("downregulated")))
    )])
    return pd.Series({
        "lib": lib, "mapped_target_genes":  mapped_tg,
        "induced": enhancer_dict["induced"], "induced_consistent": induced_consistent, 
        "repressed": enhancer_dict["repressed"], "repressed_consistent": repressed_consistent, 
        "gained": enhancer_dict["gained"], "gained_consistent": gained_consistent, 
        "lost": enhancer_dict["lost"], "lost_consistent": lost_consistent, 
        })

In [48]:
df = pd.DataFrame()
for lib in libs:
    df = pd.concat((df, get_per_lib_consistency_stats(lib, diff_meta_enhancer_gene)), axis=1)
    
df = df.T.reset_index(drop=True)

In [51]:
len(meta_enhancer_gene_df)

253632

In [45]:
lib_diff_meta.loc[
    ~((lib_diff_meta.ATF2_abc_gene.isna())& (lib_diff_meta.ATF2_nearest_gene.isna()))
    ]

Unnamed: 0,chrom_coord,ATF2,ATF2_abc_gene,ATF2_nearest_gene
1,chr19_41997247_41997747,induced,,downregulated
2,chr18_48106841_48107341,induced,,upregulated
6,chr18_48106891_48107391,induced,,upregulated
11,chr18_48106941_48107441,induced,,upregulated
14,chr18_48106991_48107491,induced,,upregulated
...,...,...,...,...
253627,chr8_46617217_46617717,repressed,downregulated|not_significant|not_significant|...,downregulated
253628,chr1_233179702_233180202,repressed,,upregulated
253629,chr1_222455392_222455885,repressed,,not_significant
253630,chr1_222455342_222455842,repressed,,not_significant


In [42]:
df

Unnamed: 0,lib,induced,induced_consistent,repressed,repressed_consistent,gained,gained_consistent,lost,lost_consistent
0,ATF2,8070,3184,11525,2121,54,31,1338,238
1,CTCF,5517,1272,5040,1054,59,10,281,77
2,FOXA1,29184,5496,5282,1016,764,168,15,3
3,LEF1,3097,479,5955,1234,245,69,848,189
4,SCRT1,3717,792,256,10,303,52,12,2
5,TCF7L2,8604,1624,329,79,432,65,0,0
6,16P12_1,1885,266,3221,648,167,29,57,2


In [None]:
lib="ATF2"
use_cols = ["chrom_coord", f"{lib}", f"{lib}_abc_gene", f"{lib}_nearest_gene"]

lib_diff_meta = diff_meta_enhancer_gene.loc[:, use_cols]

In [None]:
lib_diff_meta.loc[(
    ((lib_diff_meta[lib]=="induced"))&
    ((lib_diff_meta[f"{lib}_abc_gene"].str.contains("upregulated")|lib_diff_meta[f"{lib}_nearest_gene"].str.contains("upregulated")))
    )]

Unnamed: 0,chrom_coord,ATF2,ATF2_abc_gene,ATF2_nearest_gene
2,chr18_48106841_48107341,induced,,upregulated
6,chr18_48106891_48107391,induced,,upregulated
11,chr18_48106941_48107441,induced,,upregulated
14,chr18_48106991_48107491,induced,,upregulated
28,chr17_75349988_75350488,induced,,upregulated|not_significant
...,...,...,...,...
252598,chr8_630925_631425,gained,,upregulated
252612,chr8_631025_631525,gained,,upregulated
252613,chr8_630875_631375,gained,,upregulated
252627,chr8_631075_631575,induced,,upregulated


In [70]:
diff_meta_enhancer_gene.loc[
    (diff_meta_enhancer_gene.ATF2=="lost")&
    (diff_meta_enhancer_gene.LEF1=="lost")&
    (diff_meta_enhancer_gene.ATF2_nearest_gene.str.contains("downregulated")|diff_meta_enhancer_gene.ATF2_abc_gene.str.contains("downregulated"))&
    (diff_meta_enhancer_gene.LEF1_nearest_gene.str.contains("downregulated")|diff_meta_enhancer_gene.LEF1_abc_gene.str.contains("downregulated"))
    ]

Unnamed: 0,chrom_coord,ATF2,CTCF,FOXA1,LEF1,SCRT1,TCF7L2,16P12_1,abc_gene,nearest_gene,...,SCRT1_abc_gene,TCF7L2_abc_gene,16P12_1_abc_gene,ATF2_nearest_gene,CTCF_nearest_gene,FOXA1_nearest_gene,LEF1_nearest_gene,SCRT1_nearest_gene,TCF7L2_nearest_gene,16P12_1_nearest_gene
232312,chr3_24487420_24487920,lost,induced,induced,lost,induced,active,induced,,ENSG00000151090.20,...,,,,downregulated,not_significant,not_significant,downregulated,downregulated,not_significant,not_significant
235755,chr3_24487520_24487990,lost,induced,induced,lost,induced,active,induced,,ENSG00000151090.20,...,,,,downregulated,not_significant,not_significant,downregulated,downregulated,not_significant,not_significant
236802,chr10_113067322_113067822,lost,active,induced,lost,induced,active,induced,,ENSG00000148737.18,...,,,,downregulated,upregulated,not_significant,downregulated,not_significant,downregulated,not_significant
237402,chr7_104943294_104943794,lost,lost,active,lost,active,active,lost,,ENSG00000005483.23,...,,,,downregulated,not_significant,upregulated,downregulated,not_significant,upregulated,not_significant
238623,chr7_104943194_104943694,lost,lost,active,lost,active,active,lost,,ENSG00000005483.23,...,,,,downregulated,not_significant,upregulated,downregulated,not_significant,upregulated,not_significant
243912,chr13_40660115_40660615,lost,active,induced,lost,induced,active,induced,,ENSG00000150907.10,...,,,,downregulated,upregulated,upregulated,downregulated,downregulated,upregulated,downregulated
245376,chr12_68787313_68787813,lost,active,induced,lost,induced,active,active,,ENSG00000175782.11,...,,,,downregulated,not_significant,not_significant,downregulated,not_significant,not_significant,not_significant
247056,chr22_32847220_32847720,lost,active,unresponsive,lost,active,unresponsive,active,,ENSG00000185666.15|ENSG00000100234.12,...,,,,downregulated|upregulated,downregulated|downregulated,not_significant|upregulated,downregulated|upregulated,not_significant|not_significant,downregulated|upregulated,downregulated|downregulated
247540,chr22_32847120_32847620,lost,unresponsive,unresponsive,lost,active,unresponsive,active,,ENSG00000185666.15|ENSG00000100234.12,...,,,,downregulated|upregulated,downregulated|downregulated,not_significant|upregulated,downregulated|upregulated,not_significant|not_significant,downregulated|upregulated,downregulated|downregulated
248291,chr16_85562401_85562901,lost,active,induced,lost,induced,unresponsive,induced,,ENSG00000131149.19,...,,,,downregulated,downregulated,downregulated,downregulated,upregulated,downregulated,downregulated


In [63]:
query = "(" + "&".join([f"(`{lib}`=='inactive')" for lib in libs]) + ")"

inactive_meta_enhancer_gene = meta_enhancer_gene_df.query(query)

In [64]:
query = "(" + "&".join([f"(`{lib}`=='active')" for lib in libs]) + ")"

active_meta_enhancer_gene = meta_enhancer_gene_df.query(query)

In [75]:
meta_enhancer_gene_df.ATF2.value_counts()

inactive        245977
repressed         3221
induced           1885
active            1853
unresponsive       472
gained             167
lost                57
Name: ATF2, dtype: int64

In [65]:
meta_exp_df.loc[meta_exp_df.gene_name=="IFNB1"].squeeze(axis=0)

gene_id                   ENSG00000171855.7
gene_name                             IFNB1
CC                                 0.074246
ATF2                               7.319063
ATF2_log2FoldChange                 5.50148
ATF2_pvalue                             0.0
ATF2_padj                               0.0
CTCF                               0.080469
CTCF_log2FoldChange                0.162862
CTCF_pvalue                        0.957295
CTCF_padj                               NaN
FOXA1                              0.138901
FOXA1_log2FoldChange               0.281232
FOXA1_pvalue                       0.923856
FOXA1_padj                              NaN
LEF1                               0.146257
LEF1_log2FoldChange                0.392038
LEF1_pvalue                        0.886307
LEF1_padj                               NaN
SCRT1                                   0.0
SCRT1_log2FoldChange              -0.174431
SCRT1_pvalue                       0.954265
SCRT1_padj                      

In [32]:
def get_genes(lib):
    filename = f"../data/da_enhancers_to_de_genes_links/{lib}/lost_merged.csv"
    if os.path.exists(filename):
        df = pd.read_csv(filename)
        genes =  set(df.gene_name.values)
    else:
        genes = set()
    return genes

In [33]:
gained_genes = {l:get_genes(l) for l in libs}

In [34]:
gained_genes["ATF2"].intersection(gained_genes["LEF1"])

{'ACTN4',
 'ANKRD11',
 'AP1M1',
 'APBB2',
 'ARHGEF3',
 'ATF3',
 'ATG4A',
 'ATL1',
 'ATXN7L3',
 'BTG3',
 'CABIN1',
 'CAMK4',
 'CARMIL1',
 'CMAHP',
 'COL4A6',
 'CTNNA1',
 'DST',
 'ENSG00000259362',
 'ENSG00000290644',
 'ERI2',
 'FAM13C',
 'FAM174B',
 'FAT3',
 'FENDRR',
 'FOXO1',
 'G6PD',
 'GALNTL6',
 'GAS6',
 'GNAI1',
 'GPHN',
 'GSE1',
 'HDAC9',
 'KAT2B',
 'KAZN',
 'KITLG',
 'KMT2E',
 'L3MBTL2-AS1',
 'LIMK2',
 'LINC00870',
 'LTBP1',
 'MAT2B',
 'MBD3',
 'MBNL1',
 'MCC',
 'METTL7A',
 'MINK1',
 'MLLT3',
 'MR1',
 'MTG1',
 'MYH9',
 'NF2',
 'PGPEP1',
 'ROR2',
 'RPTOR',
 'SEMA6A',
 'SETBP1',
 'SHROOM3',
 'SLC35E3',
 'SMOC1',
 'SRC',
 'STK17A',
 'SUSD6',
 'SYN3',
 'SYNE1',
 'TANGO6',
 'TCF7L2',
 'THRB',
 'THSD4',
 'TIMP3',
 'TNFAIP8',
 'TRIM9',
 'TUBGCP2',
 'TXNRD2',
 'UQCC1',
 'USP36',
 'USP9X',
 'YBX3',
 'ZNF511'}

In [27]:
for lib in libs:
    other_lib_genes = reduce(lambda x,y: x.union(y), [gained_genes[l] for l in libs if l!=lib])
    unique_lib_genes = gained_genes[lib].difference(other_lib_genes)
    print(lib)
    print(sorted(unique_lib_genes))

ATF2
['MYPN', 'PSEN1', 'RCSD1', 'SETX', 'SLC9A7', 'SMPX', 'STX8', 'TET2']
CTCF
['ENSG00000286407', 'KIF7', 'MERTK', 'RGS12', 'SAMD11', 'SPRING1', 'TICRR', 'TXNRD2']
FOXA1
['ACTB', 'ASAP1', 'ATF3', 'B3GNTL1', 'C19orf47', 'CDC25C', 'CLEC16A', 'DIP2C', 'DST', 'EBF3', 'ENSG00000242880', 'ERCC6L2-AS1', 'ESYT2', 'EYA2', 'FAM184A', 'FRS3', 'GNA12', 'IGFBP7', 'IGFL4', 'ITGA2', 'KAZN', 'KCNMA1', 'KLRK1-AS1', 'LIMK2', 'LINC00877', 'LINC01033', 'LRRFIP2', 'MBP', 'MFAP3', 'MIR34AHG', 'MTMR3', 'MYO1A', 'NCOA7', 'PCMTD1', 'PGPEP1', 'PPARGC1B', 'PPP1R14C', 'PPP1R37', 'PRKCE', 'PSMD12', 'RAD51B', 'RBM48', 'RGS9', 'RHBDL3', 'SCAMP1-AS1', 'SERTAD2', 'SFXN3', 'SLC2A12', 'SLC44A3-AS1', 'SMARCB1', 'SMOC1', 'SMU1', 'SRSF3', 'SSBP2', 'TAB2', 'TADA2A', 'TANC1', 'TRIM2', 'TRIO', 'UST', 'VPS52', 'WWC1', 'ZNF423', 'ZNF516', 'ZNF536', 'ZNF704', 'ZSWIM6']
LEF1
['AMBRA1', 'BACE2', 'INHBA', 'OTULIN', 'PLEK2', 'PREP', 'RPS6KA5', 'TFAP2A-AS2']
SCRT1
['ANK1', 'ARHGAP26', 'ARHGAP5', 'BMP7', 'DYNC2I1', 'ENSG00000188897',

In [6]:
da_de_table = pd.read_csv("../data/activity_vs_expression_corr/nearest_da_sde_table.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [7]:
da_de_table

Unnamed: 0,chrom_coord,CC,ATF2,CTCF,FOXA1,LEF1,SCRT1,TCF7L2,16P12_1,CC_peak,...,16P12_1_padj,abc_gene,nearest_gene,ATF2_sde,CTCF_sde,FOXA1_sde,LEF1_sde,SCRT1_sde,TCF7L2_sde,16P12_1_sde
0,chr12_111558725_111559225,-4.440637,-3.418812,-4.051494,-3.812989,-4.347924,-4.347287,-3.908111,-4.258760,0,...,,,ENSG00000204842.18,,ENSG00000204842.18,,ENSG00000204842.18,,,ENSG00000204842.18
1,chr19_41997247_41997747,-4.219894,-2.625629,-3.541244,-3.722848,-4.030320,-3.911038,-4.425876,-4.230219,0,...,,,ENSG00000105409.19,ENSG00000105409.19,,ENSG00000105409.19,ENSG00000105409.19,,ENSG00000105409.19,
2,chr18_48106841_48107341,-4.219172,-2.299419,-3.408249,-3.516341,-3.953909,-3.336600,-4.251061,-3.811745,0,...,,,ENSG00000184828.10,ENSG00000184828.10,ENSG00000184828.10,,,,ENSG00000184828.10,ENSG00000184828.10
3,chrX_73848361_73848861,-4.169996,-3.227898,-3.806848,-4.115115,-3.909083,-3.946919,-3.999784,-3.585330,0,...,,,ENSG00000229807.13,ENSG00000229807.13,,,,ENSG00000229807.13,,
4,chrX_73848411_73848911,-4.168425,-3.197322,-3.764635,-4.054166,-3.838673,-3.828637,-3.882982,-3.571999,0,...,,,ENSG00000229807.13,ENSG00000229807.13,,,,ENSG00000229807.13,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253627,chr8_46617217_46617717,4.934769,2.795126,4.543962,5.647602,5.600322,4.669582,5.529039,4.901181,1,...,0.887568,ENSG00000253425.2|ENSG00000254348.1|ENSG000002...,ENSG00000290398.1,ENSG00000290398.1,,,,ENSG00000290398.1,ENSG00000290398.1,
253628,chr1_233179702_233180202,5.008113,2.933722,4.792427,5.845539,2.965903,5.713423,5.609818,5.282977,1,...,0.040872,,ENSG00000135749.19,ENSG00000135749.19,ENSG00000135749.19,ENSG00000135749.19,ENSG00000135749.19,ENSG00000135749.19,ENSG00000135749.19,
253629,chr1_222455392_222455885,5.044944,2.771727,4.297637,5.411504,5.410107,4.707708,5.773905,4.942738,1,...,0.668648,,ENSG00000226643.2,,,,,,,
253630,chr1_222455342_222455842,5.071564,2.807113,4.334105,5.461474,5.431616,4.735181,5.804198,4.979754,0,...,0.709149,,ENSG00000226643.2,,,,,,,


In [8]:
da_de_table.columns

Index(['chrom_coord', 'CC', 'ATF2', 'CTCF', 'FOXA1', 'LEF1', 'SCRT1', 'TCF7L2',
       '16P12_1', 'CC_peak', 'ATF2_peak', 'CTCF_peak', 'FOXA1_peak',
       'LEF1_peak', 'SCRT1_peak', 'TCF7L2_peak', '16P12_1_peak',
       'ATF2_log2FoldChange', 'CTCF_log2FoldChange', 'FOXA1_log2FoldChange',
       'LEF1_log2FoldChange', 'SCRT1_log2FoldChange', 'TCF7L2_log2FoldChange',
       '16P12_1_log2FoldChange', 'ATF2_pvalue', 'CTCF_pvalue', 'FOXA1_pvalue',
       'LEF1_pvalue', 'SCRT1_pvalue', 'TCF7L2_pvalue', '16P12_1_pvalue',
       'ATF2_padj', 'CTCF_padj', 'FOXA1_padj', 'LEF1_padj', 'SCRT1_padj',
       'TCF7L2_padj', '16P12_1_padj', 'abc_gene', 'nearest_gene', 'ATF2_sde',
       'CTCF_sde', 'FOXA1_sde', 'LEF1_sde', 'SCRT1_sde', 'TCF7L2_sde',
       '16P12_1_sde'],
      dtype='object')

In [17]:
df_act = pd.read_csv("../data/meta_enhancer_gene.csv")

  exec(code_obj, self.user_global_ns, self.user_ns)


In [18]:
df_exp = pd.read_csv("../../5_compare_expression_ko_vs_wt/data/meta_exp.csv")

In [22]:
df_exp.loc[df_exp.gene_name=="FIRRE", ["gene_name", "CTCF_log2FoldChange", "CTCF_padj"]]

Unnamed: 0,gene_name,CTCF_log2FoldChange,CTCF_padj
22538,FIRRE,-4.909028,2.182941e-198


In [25]:
def check_sde(ser, meta_exp, libs):
    coord = ser.chrom_coord
    nearest_genes = set(ser.nearest_gene.split("|")) if not pd.isnull(ser.nearest_gene) else set()
    abc_genes = set(ser.abc_gene.split("|")) if not pd.isnull(ser.abc_gene) else set()
    genes = nearest_genes.union(abc_genes)
    genes = np.array(list(genes))
    sdes = meta_exp.loc[meta_exp.gene_id.isin(set(genes)), [f"{lib}_padj" for lib in libs]].values
    genes = genes.reshape(len(genes),1).repeat(len(libs), 1)
    sde_genes = np.where(sdes<0.05, genes, "")
    data_dict = dict()
    for i,row in enumerate(sde_genes.T):
        data_dict[f"{libs[i]}_sde"] = "|".join(row).strip("|")
    return pd.Series(data_dict)

In [44]:
df_act.loc[
    (df_act.nearest_gene=="ENSG00000213468.7")&
    (df.CTCF_padj<0.05)&(df.CC_peak==1)&(df.CTCF_peak==0),
    ["chrom_coord"]+[f"{lib}_peak" for lib in libraries]+[f"{lib}" for lib in libraries]
    ]

Unnamed: 0,chrom_coord,CC_peak,ATF2_peak,CTCF_peak,FOXA1_peak,LEF1_peak,SCRT1_peak,TCF7L2_peak,16P12_1_peak,CC,ATF2,CTCF,FOXA1,LEF1,SCRT1,TCF7L2,16P12_1
240644,chrX_131703802_131704302,1,0,0,1,1,0,1,1,1.262257,1.083688,0.86694,1.453686,1.292792,1.038494,1.577585,1.371369
240839,chrX_131704325_131704825,1,0,0,1,1,0,1,1,1.271498,1.119667,0.92334,1.587309,1.366237,0.975813,1.582303,1.377468
244202,chrX_131822514_131823014,1,0,0,1,1,1,1,1,1.460028,1.109244,1.196619,1.580561,1.529601,1.376873,1.768949,1.433751
245385,chrX_131728298_131728798,1,0,0,1,1,0,1,1,1.535373,0.536143,1.086224,1.871537,1.459991,1.213892,1.940849,1.409376
247265,chrX_131728398_131728898,1,0,0,1,1,1,1,1,1.693851,0.693921,1.226222,1.99297,1.602585,1.355051,2.090141,1.559961
248969,chrX_131792876_131793376,1,0,0,1,1,1,1,1,1.905017,1.350224,1.667743,2.18456,1.886874,1.707768,2.146402,1.838355
249321,chrX_131728798_131729298,1,0,0,1,1,1,1,1,1.951021,1.139306,1.531183,2.174159,1.971471,1.686247,2.349366,1.88164
251582,chrX_131746689_131747189,1,0,0,1,1,1,1,1,2.450096,1.283471,1.919204,2.922126,2.705214,2.251622,2.983981,2.439295


In [27]:
libraries = ["CC", "ATF2", "CTCF", "FOXA1", "LEF1", "SCRT1", "TCF7L2", "16P12_1"]

In [31]:
df_exp.loc[df_exp.gene_name=="FIRRE"]

Unnamed: 0,gene_id,gene_name,CC,ATF2,ATF2_log2FoldChange,ATF2_pvalue,ATF2_padj,CTCF,CTCF_log2FoldChange,CTCF_pvalue,...,SCRT1_pvalue,SCRT1_padj,TCF7L2,TCF7L2_log2FoldChange,TCF7L2_pvalue,TCF7L2_padj,16P12_1,16P12_1_log2FoldChange,16P12_1_pvalue,16P12_1_padj
22538,ENSG00000213468.7,FIRRE,1.325087,2.132287,0.694393,6.998537000000001e-27,4.4456529999999996e-26,0.042714,-4.909028,6.3750599999999994e-201,...,0.003257,0.01595,1.273608,0.207807,4.5e-05,0.000197,1.222075,0.017342,0.776422,0.88225


In [30]:
df_act.head().apply(check_sde,  args=(df_exp, libraries[1:]), axis=1)

Unnamed: 0,ATF2_sde,CTCF_sde,FOXA1_sde,LEF1_sde,SCRT1_sde,TCF7L2_sde,16P12_1_sde
0,ENSG00000204842.18,ENSG00000204842.18,,ENSG00000204842.18,,,ENSG00000204842.18
1,ENSG00000105409.19,ENSG00000105409.19,ENSG00000105409.19,ENSG00000105409.19,,ENSG00000105409.19,
2,ENSG00000184828.10,ENSG00000184828.10,,,,ENSG00000184828.10,ENSG00000184828.10
3,ENSG00000229807.13,,,,ENSG00000229807.13,,
4,ENSG00000229807.13,,,,ENSG00000229807.13,,
