In [3]:
import os
import pandas as pd

In [4]:
results_dir = "../data/"
libs = ["CC", "ATF2", "CTCF", "FOXA1", "LEF1", "SCRT1", "TCF7L2", "16P12_1"]
c_types = ["peaks_vs_notpeaks", "responsive_vs_nonresponsive", "induced_vs_repressed"]
methods = ["homer", "meme"]


In [5]:
def get_homer_results(resdir, lib, ct):
    filename = os.path.join(resdir, lib, "homer", ct, "knownResults.txt")
    df = pd.read_csv(filename, sep="\t")
    df["Motif Name"] = df["Motif Name"].str.split("/", expand=True)[0]
    df = df.loc[df["q-value (Benjamini)"]<0.01]
    return df

def get_meme_results(resdir, lib, ct):
    filename = os.path.join(resdir, lib, "meme", ct, "ame.tsv")
    df = pd.read_csv(filename, sep="\t")
    df["motif_ID"] = df["motif_ID"].str.split(".", expand=True)[0]
    df = df.loc[df["adj_p-value"]<0.01]
    return df

def get_top_motifs(resdir, libs, ct, method, N=30):
    method_func_dict = {"homer": get_homer_results, "meme": get_meme_results}
    method_colname_dict = {"homer": "Motif Name", "meme": "motif_ID"}
    dfs = [method_func_dict[method](resdir, lib, ct).iloc[:N] for lib in libs]
    dfs = [df[method_colname_dict[method]] for df in dfs]
    df = pd.concat(dfs, axis=1)
    df.columns = libs
    return df


In [9]:
get_top_motifs(results_dir, libs[1:], c_types[1], "meme", N=30)

Unnamed: 0,ATF2,CTCF,FOXA1,LEF1,SCRT1,TCF7L2,16P12_1
0,P53_HUMAN,THAP1_HUMAN,THAP1_HUMAN,P53_HUMAN,P53_HUMAN,CEBPG_HUMAN,ZFX_HUMAN
1,P63_HUMAN,SP2_HUMAN,SP2_HUMAN,P73_HUMAN,P63_HUMAN,ATF4_HUMAN,P53_HUMAN
2,P73_HUMAN,SP3_HUMAN,SP3_HUMAN,P63_HUMAN,P73_HUMAN,P53_HUMAN,P73_HUMAN
3,ATF4_HUMAN,AP2B_HUMAN,KLF3_HUMAN,CEBPG_HUMAN,ZN449_HUMAN,P63_HUMAN,P63_HUMAN
4,CEBPG_HUMAN,KLF3_HUMAN,E2F4_HUMAN,ATF4_HUMAN,SMAD3_HUMAN,JUN_HUMAN,AP2B_HUMAN
5,ZIC1_HUMAN,E2F4_HUMAN,AP2B_HUMAN,FOSL1_HUMAN,SRBP1_HUMAN,JUND_HUMAN,SP2_HUMAN
6,FEV_HUMAN,KLF6_HUMAN,MECP2_HUMAN,FOSL2_HUMAN,THAP1_HUMAN,P73_HUMAN,THAP1_HUMAN
7,SMAD3_HUMAN,KLF12_HUMAN,SP1_HUMAN,JUND_HUMAN,E2F3_HUMAN,FOSL2_HUMAN,SP3_HUMAN
8,KLF6_HUMAN,SP1_HUMAN,KLF6_HUMAN,JUN_HUMAN,SP3_HUMAN,FOSL1_HUMAN,ZIC1_HUMAN
9,SUH_HUMAN,MECP2_HUMAN,KLF12_HUMAN,JUNB_HUMAN,SALL4_HUMAN,FOSB_HUMAN,SRBP2_HUMAN
