In [1]:
import pandas as pd
import os

In [2]:
results_dir = "../data/"
libs = ["CC", "ATF2", "CTCF", "FOXA1", "LEF1", "SCRT1", "TCF7L2", "16P12_1"]
c_types = ["peaks_vs_notpeaks"]
methods = ["homer", "meme"]


def get_homer_results(resdir, lib, ct):
    filename = os.path.join(resdir, lib, "homer", ct, "knownResults.txt")
    df = pd.read_csv(filename, sep="\t")
    df["Motif Name"] = df["Motif Name"].str.split("/", expand=True)[0]
    df = df.loc[df["q-value (Benjamini)"]<0.01]
    return df

def get_meme_results(resdir, lib, ct):
    filename = os.path.join(resdir, lib, "meme", ct, "ame.tsv")
    df = pd.read_csv(filename, sep="\t")
    df["motif_ID"] = df["motif_ID"].str.split(".", expand=True)[0]
    df = df.loc[df["adj_p-value"]<0.01]
    return df

def create_motif_table(resdir, libs, ct, method):
    method_func_dict = {"homer": get_homer_results, "meme": get_meme_results}
    method_colname_dict = {
        "homer": ["Motif Name", "P-value", "q-value (Benjamini)", "% of Target Sequences with Motif", "% of Background Sequences with Motif"], 
        "meme": ["motif_ID", "p-value", "adj_p-value", "%TP", "%FP"]}
    dfs = []
    for lib in libs:
        df = method_func_dict[method](resdir, lib, ct).loc[:, method_colname_dict[method]]
        df["library"] = lib
        dfs.append(df)
    df = pd.concat(dfs, axis=0)
    return df

In [5]:
homer_df = create_motif_table(results_dir, libs, c_types[0], methods[0]).pivot_table(columns="library", values=["q-value (Benjamini)"], index="Motif Name").fillna(1.)
meme_df = create_motif_table(results_dir, libs, c_types[0], methods[1]).pivot_table(columns="library", values=["adj_p-value"], index="motif_ID").fillna(1.)

In [14]:
homer_df.loc[(homer_df<0.05).sum(axis=1)<2].head(40)

Unnamed: 0_level_0,q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini)
library,16P12_1,ATF2,CC,CTCF,FOXA1,LEF1,SCRT1,TCF7L2
Motif Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
ABF2(bZIP),1.0,1.0,1.0,1.0,1.0,0.0016,1.0,1.0
AIL7(AP2EREBP),1.0,1.0,1.0,1.0,1.0,0.0003,1.0,1.0
AS2(LOBAS2),1.0,1.0,1.0,1.0,1.0,0.0002,1.0,1.0
ASL18(LOBAS2),1.0,1.0,1.0,1.0,1.0,0.0074,1.0,1.0
AT1G12630(AP2EREBP),1.0,1.0,1.0,1.0,1.0,0.0048,1.0,1.0
AT1G77200(AP2EREBP),1.0,1.0,1.0,1.0,1.0,0.0006,1.0,1.0
AT3G10030(Trihelix),1.0,1.0,1.0,1.0,1.0,0.0038,1.0,1.0
At1g19210(AP2EREBP),1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0
At3g04030(G2like),1.0,1.0,1.0,1.0,1.0,0.0059,1.0,1.0
At4g31060(AP2EREBP),1.0,1.0,1.0,1.0,1.0,0.0023,1.0,1.0


In [18]:
meme_df.loc[(meme_df<0.05).sum(axis=1)==8]

Unnamed: 0_level_0,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value
library,16P12_1,ATF2,CC,CTCF,FOXA1,LEF1,SCRT1,TCF7L2
motif_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
AP2A_HUMAN,1.280000e-16,8.840000e-04,3.130000e-14,3.440000e-09,1.900000e-12,1.570000e-15,1.750000e-13,4.900000e-16
AP2B_HUMAN,3.560000e-20,5.460000e-11,3.590000e-19,3.110000e-12,7.800000e-17,5.050000e-59,2.710000e-13,1.860000e-24
AP2C_HUMAN,1.580000e-19,1.050000e-06,9.800000e-17,7.170000e-11,4.040000e-13,1.700000e-24,1.490000e-15,1.960000e-17
ATF1_HUMAN,1.860000e-43,5.010000e-35,1.860000e-48,9.600000e-18,1.210000e-74,1.020000e-114,3.260000e-26,4.120000e-73
ATF2_HUMAN,4.690000e-48,1.080000e-45,3.860000e-62,2.790000e-26,1.750000e-108,1.210000e-134,2.830000e-35,1.670000e-96
...,...,...,...,...,...,...,...,...
ZIC3_HUMAN,5.680000e-12,1.220000e-05,2.360000e-08,6.060000e-07,1.960000e-11,4.680000e-16,6.970000e-08,1.190000e-17
ZN335_HUMAN,9.110000e-15,6.440000e-08,1.250000e-13,9.320000e-14,1.390000e-11,4.490000e-16,1.740000e-12,2.910000e-14
ZN341_HUMAN,2.380000e-17,2.530000e-08,8.040000e-13,3.550000e-12,4.090000e-07,1.050000e-17,3.640000e-13,3.160000e-11
ZN554_HUMAN,1.320000e-14,1.920000e-05,1.720000e-15,1.610000e-11,1.560000e-13,3.520000e-17,1.780000e-11,1.030000e-17


In [17]:
homer_df.loc[(homer_df<0.05).sum(axis=1)==8]

Unnamed: 0_level_0,q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini)
library,16P12_1,ATF2,CC,CTCF,FOXA1,LEF1,SCRT1,TCF7L2
Motif Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
AARE(HLH),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AP-1(bZIP),0.0,0.0,0.0,0.0003,0.0,0.0,0.0,0.0
AT4G18450(AP2EREBP),0.0,0.0016,0.0,0.0001,0.0004,0.0,0.0,0.0
Atf1(bZIP),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Atf2(bZIP),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Atf3(bZIP),0.0,0.0,0.0001,0.0009,0.0,0.0,0.0001,0.0
Atf4(bZIP),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Atf7(bZIP),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
BATF(bZIP),0.0,0.0,0.0001,0.008,0.0,0.0,0.0019,0.0
Bach1(bZIP),0.0,0.0,0.0,0.0019,0.0,0.0,0.0,0.0


In [15]:
homer_df.loc[(homer_df<0.05).sum(axis=1)<2].tail(40)

Unnamed: 0_level_0,q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini),q-value (Benjamini)
library,16P12_1,ATF2,CC,CTCF,FOXA1,LEF1,SCRT1,TCF7L2
Motif Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
HSFA1E(HSF),1.0,1.0,1.0,1.0,1.0,0.0002,1.0,1.0
HSFA6B(HSF),1.0,1.0,1.0,1.0,1.0,0.0007,1.0,1.0
HSFB3(HSF),1.0,1.0,1.0,1.0,1.0,0.0005,1.0,1.0
HSFB4(HSF),1.0,1.0,1.0,1.0,1.0,0.0095,1.0,1.0
HSFC1(HSF),1.0,1.0,1.0,1.0,1.0,0.0095,1.0,1.0
LBD13(LOBAS2),1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0
LBD2(LOBAS2),1.0,1.0,1.0,1.0,1.0,0.0008,1.0,1.0
LBD23(LOBAS2),1.0,1.0,1.0,1.0,1.0,0.0048,1.0,1.0
LHY1(MYBrelated),1.0,1.0,1.0,1.0,0.001,1.0,1.0,1.0
LOB(LOBAS2),1.0,1.0,1.0,1.0,1.0,0.0011,1.0,1.0


In [13]:
meme_df.loc[(meme_df<0.05).sum(axis=1)<2]

Unnamed: 0_level_0,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value,adj_p-value
library,16P12_1,ATF2,CC,CTCF,FOXA1,LEF1,SCRT1,TCF7L2
motif_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
BMAL1_HUMAN,1.0,1.0,1.0,1.0,1.0,1.0,0.00737,1.0
E2F2_HUMAN,1.0,1.0,1.0,1.0,1.0,1.78e-17,1.0,1.0
E2F5_HUMAN,1.0,1.0,1.0,1.0,1.0,4.52e-12,1.0,1.0
KLF8_HUMAN,1.0,1.0,1.0,1.0,1.0,8.39e-06,1.0,1.0
KLF9_HUMAN,1.0,1.0,1.0,1.0,1.0,3.24e-09,1.0,1.0
MEF2B_HUMAN,1.0,1.0,1.0,1.0,0.00179,1.0,1.0,1.0
MEF2C_HUMAN,1.0,1.0,8e-06,1.0,1.0,1.0,1.0,1.0
PDX1_HUMAN,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.00775
RARA_HUMAN,1.0,1.0,1.0,1.0,1.0,1.0,0.00567,1.0
RELB_HUMAN,1.0,1.0,1.0,1.0,1.0,0.00401,1.0,1.0
