In [1]:
import os
import pandas as pd

In [2]:
results_dir = "../data/"
libs = ["CC", "ATF2", "CTCF", "FOXA1", "LEF1", "SCRT1", "TCF7L2", "16P12_1"]
c_types = ["peaks_vs_notpeaks", "responsive_vs_nonresponsive", "induced_vs_repressed"]
methods = ["homer", "meme"]


In [3]:
def get_homer_results(resdir, lib, ct):
    filename = os.path.join(resdir, lib, "homer", ct, "knownResults.txt")
    df = pd.read_csv(filename, sep="\t")
    df["Motif Name"] = df["Motif Name"].str.split("/", expand=True)[0]
    df = df.loc[df["q-value (Benjamini)"]<0.01]
    return df

def get_meme_results(resdir, lib, ct):
    filename = os.path.join(resdir, lib, "meme", ct, "ame.tsv")
    df = pd.read_csv(filename, sep="\t")
    df["motif_ID"] = df["motif_ID"].str.split(".", expand=True)[0]
    df = df.loc[df["adj_p-value"]<0.01]
    return df

def get_top_motifs(resdir, libs, ct, method, N=30):
    method_func_dict = {"homer": get_homer_results, "meme": get_meme_results}
    method_colname_dict = {"homer": "Motif Name", "meme": "motif_ID"}
    dfs = [method_func_dict[method](resdir, lib, ct).iloc[:N] for lib in libs]
    dfs = [df[method_colname_dict[method]] for df in dfs]
    df = pd.concat(dfs, axis=1)
    df.columns = libs
    return df


In [6]:
get_top_motifs(results_dir, libs[1:], c_types[2], "meme", N=30)

Unnamed: 0,ATF2,CTCF,FOXA1,LEF1,SCRT1,TCF7L2,16P12_1
0,TBX3_HUMAN,SP2_HUMAN,SP3_HUMAN,FOSL2_HUMAN,P53_HUMAN,CEBPG_HUMAN,P53_HUMAN
1,TBX21_HUMAN,SP3_HUMAN,SP2_HUMAN,FOS_HUMAN,P73_HUMAN,ATF4_HUMAN,P73_HUMAN
2,SUH_HUMAN,ZIC1_HUMAN,KLF6_HUMAN,JUNB_HUMAN,P63_HUMAN,JUN_HUMAN,P63_HUMAN
3,KLF5_HUMAN,AP2B_HUMAN,AP2B_HUMAN,FOSB_HUMAN,ZBT48_HUMAN,NF2L1_HUMAN,CDX1_HUMAN
4,KLF1_HUMAN,KLF3_HUMAN,KLF3_HUMAN,JUN_HUMAN,ZFX_HUMAN,BATF_HUMAN,SRY_HUMAN
5,SALL4_HUMAN,KLF6_HUMAN,SALL4_HUMAN,JUND_HUMAN,ZSC31_HUMAN,FOSL1_HUMAN,HXA9_HUMAN
6,EGR2_HUMAN,SALL4_HUMAN,ZIC1_HUMAN,FOSL1_HUMAN,ZN667_HUMAN,JUND_HUMAN,PRDM6_HUMAN
7,KLF9_HUMAN,KLF1_HUMAN,SRBP2_HUMAN,E2F4_HUMAN,COT1_HUMAN,FOSL2_HUMAN,HXB13_HUMAN
8,KLF6_HUMAN,KLF12_HUMAN,KLF5_HUMAN,NF2L1_HUMAN,PAX6_HUMAN,SRY_HUMAN,PIT1_HUMAN
9,ITF2_HUMAN,KLF5_HUMAN,SP4_HUMAN,ZBT14_HUMAN,E2F4_HUMAN,CDX1_HUMAN,ANDR_HUMAN
