## Load Modules

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import glob
import os

## Functions

In [2]:
def extract_table_from_html(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, "html.parser")
    
    table = soup.find("table", class_="dataframe")
    if table is None:
        return None
    
    headers = [th.text for th in table.find("thead").find_all("th")]
    rows = []
    for tr in table.find("tbody").find_all("tr"):
        cells = [td.text.strip() for td in tr.find_all("td")]
        rows.append(cells)
    
    return pd.DataFrame(rows, columns=headers)

In [3]:
def extract_all_html(file_list, has_pcw=False):
    data = []

    for file in file_list:
        df = extract_table_from_html(file)
        if has_pcw:
            df['cluster'] = file.split('/')[9].split('_')[0]
            df['pcw'] = file.split('/')[9].split('_')[1]
            df['fold'] = file.split('/')[10]
            cols_left = ['cluster', 'pcw', 'fold']
            idx_cols = [c for c in df.columns if c in cols_left]
            idx_cols += [c for c in df.columns if c not in cols_left and 'logo' not in c and 'cwm' not in c]
        else:
            df['cluster'] = file.split('/')[9]
            df['fold'] = file.split('/')[10]
            cols_left = ['cluster', 'fold']
            idx_cols = [c for c in df.columns if c in cols_left]
            idx_cols += [c for c in df.columns if c not in cols_left and 'logo' not in c and 'cwm' not in c]
        df = df[idx_cols]
        
        if df is not None:
            data.append(df)
    
    # return concatenated data
    return pd.concat(data, ignore_index=True)


## Identify Files

In [4]:
# Process all HTML files in the current directory
html_str = "/work/aaa/projects/chrombpnet-devmult/pipeline/results/chrombpnet_nobias/pretrained_bias/*/*/modisco/counts_scores/motifs.html"
html_files_all = glob.glob(html_str)

In [5]:
folds = [f"fold_{i}" for i in range(5)] + ["mean"]
html_dict = {
    'celltype': {fold: [f for f in html_files_all if ("PCW" not in f) and (fold in f) and ("HSC-" not in f)] for fold in folds},
    'hscs': {fold: [f for f in html_files_all if ("PCW" not in f) and (fold in f) and ("HSC-" in f)] for fold in folds}
}

for k, fs in html_dict.items():
    for f, hs in fs.items():
        print(f'{k} - {f}: {len(hs)}')

celltype - fold_0: 36
celltype - fold_1: 36
celltype - fold_2: 36
celltype - fold_3: 36
celltype - fold_4: 36
celltype - mean: 36
hscs - fold_0: 5
hscs - fold_1: 5
hscs - fold_2: 5
hscs - fold_3: 5
hscs - fold_4: 5
hscs - mean: 5


# Load Data

In [6]:
df_dict = {k: {f: extract_all_html(hs) if len(hs) > 0 else None for f, hs in fs.items()} for k, fs in html_dict.items()}
for k, fs in df_dict.items():
    for f, df in fs.items():
        print(f'{k} - {f}: {0 if df is None else len(df)}')

celltype - fold_0: 1714
celltype - fold_1: 1727
celltype - fold_2: 1750
celltype - fold_3: 1687
celltype - fold_4: 1696
celltype - mean: 1926
hscs - fold_0: 248
hscs - fold_1: 243
hscs - fold_2: 261
hscs - fold_3: 277
hscs - fold_4: 257
hscs - mean: 257


In [7]:
for k, fs in df_dict.items():
    for f, df in fs.items():
        if df is not None:
            display(df)

Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,MEMP-t,fold_0,pos_patterns.pattern_0,36054,ETV6.H13CORE.1.P.B,1.531220e-03,SPI1.H13CORE.0.P.B,7.562100e-03,ERF.H13CORE.0.PS.A,7.562100e-03,...,SPIB.H13CORE.0.P.B,0.008507,ELF3.H13CORE.1.PM.A,0.016268,ELF5.H13CORE.0.PSM.A,0.016268,ETV7.H13CORE.1.P.C,0.029971,ETV2.H13CORE.1.PM.A,0.029971
1,MEMP-t,fold_0,pos_patterns.pattern_1,30433,CTCF.H13CORE.0.P.B,7.879600e-11,CTCFL.H13CORE.0.P.B,1.937770e-05,ZNF503.H13CORE.0.P.B,1.017400e-01,...,PRD13.H13CORE.0.P.B,0.135362,ZIC2.H13CORE.0.P.B,0.135362,ZIC3.H13CORE.0.P.B,0.135362,NDF2.H13CORE.0.P.B,0.161663,ZIC5.H13CORE.0.P.B,0.243004
2,MEMP-t,fold_0,pos_patterns.pattern_2,18623,ZFPM1.H13CORE.0.I.C,2.524650e-01,GATA1.H13CORE.1.PSM.A,2.524650e-01,GATA2.H13CORE.0.PSM.A,2.524650e-01,...,GATA6.H13CORE.0.PSM.A,0.252465,GATA3.H13CORE.0.PS.A,0.252465,GATA4.H13CORE.0.PSM.A,0.252465,ZNF234.H13CORE.0.PSG.A,0.252465,GATA1.H13CORE.0.P.B,0.252465
3,MEMP-t,fold_0,pos_patterns.pattern_3,9172,KLF8.H13CORE.0.P.C,4.953050e-06,SP3.H13CORE.0.P.B,5.448350e-06,SP1.H13CORE.0.P.B,2.833000e-05,...,KLF11.H13CORE.0.P.B,0.000059,SP4.H13CORE.0.P.C,0.000059,KLF9.H13CORE.1.P.B,0.000111,SP5.H13CORE.0.P.B,0.000498,SP2.H13CORE.0.P.D,0.000498
4,MEMP-t,fold_0,pos_patterns.pattern_4,4335,ELK4.H13CORE.0.PSM.A,9.038730e-03,ETV1.H13CORE.0.PSM.A,9.038730e-03,FEV.H13CORE.0.S.B,9.038730e-03,...,ELK1.H13CORE.0.PSM.A,0.016610,ELK3.H13CORE.0.PSM.A,0.016610,ETS1.H13CORE.0.S.B,0.016956,ELF3.H13CORE.0.S.B,0.017225,ETS2.H13CORE.0.S.C,0.027129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1709,MastP-t,fold_0,pos_patterns.pattern_41,29,RUNX2.H13CORE.0.P.B,1.000000e+00,RUNX2.H13CORE.2.M.C,1.000000e+00,RUNX1.H13CORE.0.P.B,1.000000e+00,...,RUNX3.H13CORE.2.S.B,1.000000,SRBP2.H13CORE.2.M.C,1.000000,MAX.H13CORE.2.S.C,1.000000,ZN771.H13CORE.0.SM.B,1.000000,,
1710,MastP-t,fold_0,pos_patterns.pattern_42,21,SPIB.H13CORE.2.SM.B,7.711480e-01,ERF.H13CORE.0.PS.A,7.711480e-01,SPI1.H13CORE.0.P.B,7.711480e-01,...,ELF4.H13CORE.1.M.B,0.943406,FEZF1.H13CORE.0.P.B,0.943406,IRF4.H13CORE.0.P.B,0.943406,ETV7.H13CORE.1.P.C,1.000000,ETV6.H13CORE.1.P.B,1.000000
1711,MastP-t,fold_0,neg_patterns.pattern_0,43,COE1.H13CORE.0.P.B,4.318570e-01,PRD16.H13CORE.0.P.B,4.318570e-01,COE2.H13CORE.0.P.B,4.318570e-01,...,ZN423.H13CORE.0.P.C,0.622087,GLIS1.H13CORE.1.P.B,0.638337,STAT6.H13CORE.0.P.B,0.887914,GLIS3.H13CORE.0.P.C,1.000000,ZN143.H13CORE.0.P.B,1.000000
1712,MastP-t,fold_0,neg_patterns.pattern_1,33,ZEB2.H13CORE.0.P.B,6.057930e-02,ZEB1.H13CORE.0.P.B,2.219090e-01,ESR1.H13CORE.1.P.B,8.090180e-01,...,BMAL1.H13CORE.0.PSM.A,0.809018,ESR2.H13CORE.1.P.B,1.000000,PRD16.H13CORE.0.P.B,1.000000,ZNF131.H13CORE.0.PSGI.A,1.000000,RARG.H13CORE.0.P.B,1.000000


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,MEMP-t,fold_1,pos_patterns.pattern_0,33915,ETV6.H13CORE.1.P.B,1.685330e-03,SPI1.H13CORE.0.P.B,6.888640e-03,SPIB.H13CORE.2.SM.B,7.654590e-03,...,EHF.H13CORE.0.P.B,0.008422,ELF5.H13CORE.0.PSM.A,0.018781,ELF3.H13CORE.1.PM.A,0.018781,ETV7.H13CORE.1.P.C,0.030844,ETV1.H13CORE.1.PM.A,0.032839
1,MEMP-t,fold_1,pos_patterns.pattern_1,27250,CTCF.H13CORE.0.P.B,3.372100e-10,CTCFL.H13CORE.0.P.B,3.598730e-05,ZNF503.H13CORE.0.P.B,9.857520e-02,...,PRD13.H13CORE.0.P.B,0.132101,ZIC2.H13CORE.0.P.B,0.132101,ZIC3.H13CORE.0.P.B,0.132101,NDF2.H13CORE.0.P.B,0.134615,NR1H3.H13CORE.0.P.B,0.245146
2,MEMP-t,fold_1,pos_patterns.pattern_2,17055,GATA3.H13CORE.0.PS.A,2.725400e-01,GATA4.H13CORE.0.PSM.A,2.725400e-01,ZFPM1.H13CORE.0.I.C,2.725400e-01,...,GATA5.H13CORE.0.SM.B,0.272540,GATA3.H13CORE.1.SM.B,0.272540,GATA6.H13CORE.0.PSM.A,0.272540,ZNF234.H13CORE.0.PSG.A,0.308762,GATA1.H13CORE.0.P.B,0.308762
3,MEMP-t,fold_1,pos_patterns.pattern_3,6095,SP3.H13CORE.0.P.B,2.067700e-07,SP1.H13CORE.0.P.B,9.414710e-07,KLF9.H13CORE.1.P.B,1.757410e-06,...,SP1.H13CORE.2.P.B,0.000006,KLF11.H13CORE.0.P.B,0.000008,KLF12.H13CORE.0.P.C,0.000011,SP2.H13CORE.0.P.D,0.000042,KLF10.H13CORE.0.P.C,0.000076
4,MEMP-t,fold_1,pos_patterns.pattern_4,3951,ELK4.H13CORE.0.PSM.A,7.829380e-03,ELK3.H13CORE.0.PSM.A,7.829380e-03,ETV1.H13CORE.0.PSM.A,7.829380e-03,...,GABPA.H13CORE.0.PSM.A,0.007829,ELK1.H13CORE.0.PSM.A,0.024744,ETS1.H13CORE.0.S.B,0.024744,ELF3.H13CORE.0.S.B,0.024744,ETV4.H13CORE.1.SM.B,0.024744
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1722,MastP-t,fold_1,pos_patterns.pattern_29,41,RUNX3.H13CORE.1.S.B,1.258260e-01,RUNX2.H13CORE.2.M.C,1.664620e-01,RUNX3.H13CORE.0.P.B,2.675680e-01,...,RUNX2.H13CORE.0.P.B,0.625740,RUNX3.H13CORE.2.S.B,0.625740,ZN197.H13CORE.0.P.C,0.625740,RUNX2.H13CORE.1.S.B,0.625740,MEIS1.H13CORE.0.P.B,0.651156
1723,MastP-t,fold_1,pos_patterns.pattern_30,39,BACH1.H13CORE.0.P.B,4.474270e-03,BACH2.H13CORE.0.P.B,1.129180e-02,NFE2.H13CORE.0.P.B,1.129180e-02,...,NF2L2.H13CORE.1.P.B,0.059787,BATF.H13CORE.0.P.B,0.059787,NF2L1.H13CORE.0.P.B,0.061008,JUNB.H13CORE.0.PM.A,0.061008,FOSB.H13CORE.0.P.B,0.061008
1724,MastP-t,fold_1,pos_patterns.pattern_31,34,ERF.H13CORE.0.PS.A,2.069740e-02,SPI1.H13CORE.0.P.B,2.069740e-02,SPIB.H13CORE.1.S.C,2.069740e-02,...,ELF2.H13CORE.1.M.B,0.043327,ETV2.H13CORE.1.PM.A,0.043327,ETV4.H13CORE.0.P.B,0.047872,ELK1.H13CORE.0.PSM.A,0.117512,ETS2.H13CORE.1.P.B,0.117512
1725,MastP-t,fold_1,pos_patterns.pattern_32,34,SPIB.H13CORE.2.SM.B,4.973040e-01,SPI1.H13CORE.0.P.B,4.973040e-01,FEZF1.H13CORE.0.P.B,6.609540e-01,...,ERF.H13CORE.0.PS.A,0.660954,ZN875.H13CORE.0.P.C,0.830319,SPIB.H13CORE.0.P.B,1.000000,ETV6.H13CORE.1.P.B,1.000000,IRF8.H13CORE.0.P.B,1.000000


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,MEMP-t,fold_2,pos_patterns.pattern_0,37298,ETV6.H13CORE.1.P.B,1.751880e-03,SPI1.H13CORE.0.P.B,2.017450e-03,SPIB.H13CORE.0.P.B,3.277030e-03,...,EHF.H13CORE.0.P.B,0.024735,ELF5.H13CORE.0.PSM.A,0.024735,ELF3.H13CORE.1.PM.A,0.031522,ETV7.H13CORE.1.P.C,0.044554,ETV1.H13CORE.1.PM.A,0.044554
1,MEMP-t,fold_2,pos_patterns.pattern_1,29718,CTCF.H13CORE.0.P.B,1.480020e-10,CTCFL.H13CORE.0.P.B,2.342470e-05,ZNF503.H13CORE.0.P.B,1.082150e-01,...,PRD13.H13CORE.0.P.B,0.123686,ZIC3.H13CORE.0.P.B,0.123686,ZIC2.H13CORE.0.P.B,0.123686,NDF2.H13CORE.0.P.B,0.157496,ZIC5.H13CORE.0.P.B,0.234859
2,MEMP-t,fold_2,pos_patterns.pattern_2,19214,GATA3.H13CORE.0.PS.A,2.635380e-01,GATA4.H13CORE.0.PSM.A,2.635380e-01,ZFPM1.H13CORE.0.I.C,2.635380e-01,...,GATA5.H13CORE.0.SM.B,0.263538,GATA3.H13CORE.1.SM.B,0.263538,GATA6.H13CORE.0.PSM.A,0.263538,ZNF234.H13CORE.0.PSG.A,0.298567,GATA1.H13CORE.0.P.B,0.298567
3,MEMP-t,fold_2,pos_patterns.pattern_3,6918,SP3.H13CORE.0.P.B,6.381520e-07,SP1.H13CORE.2.P.B,6.381520e-07,KLF11.H13CORE.0.P.B,1.352550e-06,...,SP1.H13CORE.0.P.B,0.000003,KLF12.H13CORE.0.P.C,0.000005,KLF9.H13CORE.1.P.B,0.000007,SP2.H13CORE.0.P.D,0.000055,KLF10.H13CORE.0.P.C,0.000100
4,MEMP-t,fold_2,pos_patterns.pattern_4,5092,ETV6.H13CORE.0.PS.A,3.343690e-04,ELF2.H13CORE.0.PS.A,3.343690e-04,ELF1.H13CORE.0.PSM.A,3.343690e-04,...,ELF5.H13CORE.1.S.B,0.000820,ELK3.H13CORE.0.PSM.A,0.000851,FEV.H13CORE.0.S.B,0.000851,ERG.H13CORE.1.SM.B,0.000851,GABPA.H13CORE.0.PSM.A,0.000851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1745,MastP-t,fold_2,pos_patterns.pattern_37,30,KLF8.H13CORE.1.P.C,6.931380e-04,KLF12.H13CORE.0.P.C,6.931380e-04,SP4.H13CORE.2.P.B,6.931380e-04,...,KLF9.H13CORE.1.P.B,0.001092,MAZ.H13CORE.1.P.B,0.001521,KLF13.H13CORE.1.P.C,0.001953,KLF6.H13CORE.0.P.B,0.001953,SP4.H13CORE.0.P.C,0.004095
1746,MastP-t,fold_2,pos_patterns.pattern_38,30,NFKB2.H13CORE.0.PSM.A,3.425230e-01,SPIB.H13CORE.1.S.C,5.340680e-01,IRF2.H13CORE.0.PSM.A,5.340680e-01,...,ZN701.H13CORE.0.P.B,0.534068,ZN510.H13CORE.1.P.C,0.534068,IRF3.H13CORE.0.PS.A,0.534068,ERG.H13CORE.1.SM.B,0.534068,ZBED5.H13CORE.0.PSGIB.A,0.534068
1747,MastP-t,fold_2,pos_patterns.pattern_39,30,Z585B.H13CORE.0.P.C,6.316740e-01,ZN675.H13CORE.0.P.C,6.316740e-01,RUNX1.H13CORE.0.P.B,6.601890e-01,...,OLIG1.H13CORE.0.S.B,1.000000,NPAS2.H13CORE.1.M.C,1.000000,ZN586.H13CORE.0.P.C,1.000000,FIGLA.H13CORE.0.SM.B,1.000000,RUNX3.H13CORE.0.P.B,1.000000
1748,MastP-t,fold_2,pos_patterns.pattern_40,26,ETV6.H13CORE.1.P.B,2.787260e-01,ETS1.H13CORE.1.P.B,2.787260e-01,ELF2.H13CORE.0.PS.A,2.787260e-01,...,ETV2.H13CORE.1.PM.A,0.307294,ETV1.H13CORE.1.PM.A,0.394123,ETV4.H13CORE.0.P.B,0.394123,PBX1.H13CORE.1.P.B,0.394123,ZIC1.H13CORE.1.M.B,0.446596


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,MEMP-t,fold_3,pos_patterns.pattern_0,31257,ETV6.H13CORE.1.P.B,2.278800e-03,SPI1.H13CORE.0.P.B,4.753870e-03,SPIB.H13CORE.2.SM.B,5.736670e-03,...,EHF.H13CORE.0.P.B,0.012970,ELF3.H13CORE.1.PM.A,0.022613,ELF5.H13CORE.0.PSM.A,0.022613,ETV7.H13CORE.1.P.C,0.036942,ETV1.H13CORE.1.PM.A,0.036942
1,MEMP-t,fold_3,pos_patterns.pattern_1,29783,CTCF.H13CORE.0.P.B,6.292980e-11,CTCFL.H13CORE.0.P.B,1.509570e-05,ZNF503.H13CORE.0.P.B,9.954820e-02,...,PRD13.H13CORE.0.P.B,0.142382,ZIC2.H13CORE.0.P.B,0.142382,ZIC3.H13CORE.0.P.B,0.142382,NDF2.H13CORE.0.P.B,0.145628,ZIC5.H13CORE.0.P.B,0.234377
2,MEMP-t,fold_3,pos_patterns.pattern_2,16491,GATA2.H13CORE.1.P.B,4.338750e-03,TAL1.H13CORE.0.P.B,4.523850e-03,GATA1.H13CORE.1.PSM.A,1.236050e-02,...,GATA1.H13CORE.0.P.B,0.030122,GATA4.H13CORE.0.PSM.A,0.031075,GATA3.H13CORE.0.PS.A,0.037888,TAL1.H13CORE.1.P.B,0.041878,GATAD2A.H13CORE.0.SG.A,0.048478
3,MEMP-t,fold_3,pos_patterns.pattern_3,8142,SP3.H13CORE.0.P.B,6.816990e-08,SP1.H13CORE.2.P.B,7.897920e-07,KLF8.H13CORE.0.P.C,1.428830e-06,...,KLF9.H13CORE.1.P.B,0.000003,KLF11.H13CORE.0.P.B,0.000005,SP4.H13CORE.0.P.C,0.000011,SP2.H13CORE.0.P.D,0.000048,KLF10.H13CORE.0.P.C,0.000085
4,MEMP-t,fold_3,pos_patterns.pattern_4,4256,FEV.H13CORE.0.S.B,1.478690e-02,GABPA.H13CORE.0.PSM.A,1.478690e-02,ELK4.H13CORE.0.PSM.A,1.478690e-02,...,ETV1.H13CORE.0.PSM.A,0.014787,ERG.H13CORE.1.SM.B,0.014787,ETS1.H13CORE.0.S.B,0.014787,ELF3.H13CORE.0.S.B,0.014787,ETV4.H13CORE.1.SM.B,0.014787
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1682,MastP-t,fold_3,pos_patterns.pattern_27,31,RFX5.H13CORE.0.PSG.A,6.489030e-06,RFX3.H13CORE.0.PSM.A,6.489030e-06,RFX1.H13CORE.1.PSM.A,3.053980e-05,...,RFX2.H13CORE.0.PS.A,0.001275,RFX1.H13CORE.0.PM.A,0.005208,RFX3.H13CORE.1.PS.A,0.007037,RFX6.H13CORE.0.P.C,0.167710,RFX7.H13CORE.0.SM.B,0.196580
1683,MastP-t,fold_3,pos_patterns.pattern_28,23,ZNF234.H13CORE.0.PSG.A,8.050890e-01,ZNF226.H13CORE.0.PSGI.A,8.050890e-01,ZN529.H13CORE.0.P.B,8.050890e-01,...,ZNF888.H13CORE.0.P.B,0.920516,IRF4.H13CORE.0.P.B,0.920516,FEZF1.H13CORE.0.P.B,0.920516,SPIB.H13CORE.0.P.B,0.920516,ANDR.H13CORE.2.P.B,1.000000
1684,MastP-t,fold_3,pos_patterns.pattern_29,23,E2F2.H13CORE.0.S.B,4.422890e-01,ZBT14.H13CORE.0.P.C,4.422890e-01,ZBT14.H13CORE.1.S.B,4.422890e-01,...,GMEB2.H13CORE.0.P.C,1.000000,,,,,,,,
1685,MastP-t,fold_3,pos_patterns.pattern_30,22,PBX3.H13CORE.1.P.B,1.222880e-03,PBX2.H13CORE.0.P.B,1.222880e-03,PKNX1.H13CORE.1.P.B,1.222880e-03,...,PBX2.H13CORE.1.P.C,0.054846,PKNX1.H13CORE.0.P.B,0.054846,PBX1.H13CORE.1.P.B,0.115321,PBX1.H13CORE.2.P.B,0.278668,PKNX1.H13CORE.2.SM.B,0.281168


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,MEMP-t,fold_4,pos_patterns.pattern_0,39040,ETV6.H13CORE.1.P.B,3.129340e-04,EHF.H13CORE.0.P.B,4.901750e-03,ERF.H13CORE.0.PS.A,6.362890e-03,...,SPIB.H13CORE.0.P.B,0.006576,SPIB.H13CORE.2.SM.B,0.006576,ELF3.H13CORE.1.PM.A,0.008994,ETV7.H13CORE.1.P.C,0.015817,ETV1.H13CORE.1.PM.A,0.015817
1,MEMP-t,fold_4,pos_patterns.pattern_1,29500,CTCF.H13CORE.0.P.B,3.205030e-12,CTCFL.H13CORE.0.P.B,2.746590e-06,ZNF503.H13CORE.0.P.B,1.609990e-01,...,PRD13.H13CORE.0.P.B,0.168165,NDF2.H13CORE.0.P.B,0.216753,ZIC2.H13CORE.0.P.B,0.216753,ZIC3.H13CORE.0.P.B,0.216753,ZIC5.H13CORE.0.P.B,0.415779
2,MEMP-t,fold_4,pos_patterns.pattern_2,18016,ZFPM1.H13CORE.0.I.C,1.779370e-01,GATA3.H13CORE.0.PS.A,1.779370e-01,GATA4.H13CORE.0.PSM.A,1.779370e-01,...,GATA5.H13CORE.0.SM.B,0.177937,GATA3.H13CORE.1.SM.B,0.177937,GATA6.H13CORE.0.PSM.A,0.177937,ZNF234.H13CORE.0.PSG.A,0.216012,GATA1.H13CORE.0.P.B,0.216012
3,MEMP-t,fold_4,pos_patterns.pattern_3,7222,KLF12.H13CORE.0.P.C,1.778210e-05,SP3.H13CORE.0.P.B,1.778210e-05,SP1.H13CORE.2.P.B,1.778210e-05,...,KLF11.H13CORE.0.P.B,0.000099,SP4.H13CORE.0.P.C,0.000099,KLF9.H13CORE.1.P.B,0.000186,SP2.H13CORE.0.P.D,0.000486,SP5.H13CORE.0.P.B,0.000808
4,MEMP-t,fold_4,pos_patterns.pattern_4,3959,RUNX3.H13CORE.0.P.B,3.469130e-02,RUNX2.H13CORE.0.P.B,3.469130e-02,RUNX1.H13CORE.0.P.B,3.469130e-02,...,RUNX3.H13CORE.2.S.B,0.444399,RUNX2.H13CORE.1.S.B,0.444399,FOXH1.H13CORE.0.P.B,0.667703,TCF7.H13CORE.0.PSM.A,0.832816,RUNX3.H13CORE.1.S.B,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1691,MastP-t,fold_4,pos_patterns.pattern_39,33,ZN285.H13CORE.0.P.C,4.539030e-01,HLF.H13CORE.0.P.B,4.539030e-01,ZN112.H13CORE.0.P.C,4.539030e-01,...,CEBPA.H13CORE.0.P.B,0.510396,ERF.H13CORE.0.PS.A,0.510396,ZN547.H13CORE.0.P.B,0.510396,CEBPG.H13CORE.1.SM.B,0.510396,DBP.H13CORE.0.SM.B,0.510396
1692,MastP-t,fold_4,pos_patterns.pattern_40,32,SPI1.H13CORE.0.P.B,9.511450e-02,IRF7.H13CORE.1.SM.B,9.511450e-02,SPIB.H13CORE.1.S.C,1.163400e-01,...,IRF4.H13CORE.0.P.B,0.116340,TAL1.H13CORE.1.P.B,0.116340,IRF8.H13CORE.1.SM.B,0.116340,ZNF160.H13CORE.0.SG.A,0.116340,STAT2.H13CORE.0.P.B,0.116340
1693,MastP-t,fold_4,pos_patterns.pattern_41,25,Z280A.H13CORE.0.P.B,4.959940e-01,SPIB.H13CORE.1.S.C,4.959940e-01,ZN124.H13CORE.0.P.C,4.959940e-01,...,ZNF233.H13CORE.0.PG.A,0.689873,TCF21.H13CORE.0.PS.A,0.689873,ELF2.H13CORE.1.M.B,0.689873,NDF2.H13CORE.0.P.B,0.689873,SPIB.H13CORE.0.P.B,0.689873
1694,MastP-t,fold_4,pos_patterns.pattern_42,24,SPI1.H13CORE.0.P.B,1.247500e-02,ZN341.H13CORE.1.P.B,1.904310e-01,ZN770.H13CORE.1.P.B,2.062420e-01,...,ERF.H13CORE.0.PS.A,0.206242,ZN267.H13CORE.0.P.C,0.206242,ZN816.H13CORE.1.P.C,0.227568,IRF4.H13CORE.0.P.B,0.227568,ETS2.H13CORE.1.P.B,0.227568


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,MEMP-t,mean,pos_patterns.pattern_0,39590,ETV6.H13CORE.1.P.B,1.654940e-03,SPI1.H13CORE.0.P.B,7.230640e-03,SPIB.H13CORE.2.SM.B,8.695780e-03,...,EHF.H13CORE.0.P.B,0.011099,ELF5.H13CORE.0.PSM.A,0.019182,ELF3.H13CORE.1.PM.A,0.019182,ETV7.H13CORE.1.P.C,0.032950,ETV1.H13CORE.1.PM.A,0.032950
1,MEMP-t,mean,pos_patterns.pattern_1,30153,CTCF.H13CORE.0.P.B,3.828630e-11,CTCFL.H13CORE.0.P.B,2.115010e-05,ZNF503.H13CORE.0.P.B,9.681310e-02,...,PRD13.H13CORE.0.P.B,0.139079,ZIC2.H13CORE.0.P.B,0.139079,ZIC3.H13CORE.0.P.B,0.139079,NDF2.H13CORE.0.P.B,0.165836,ZIC5.H13CORE.0.P.B,0.249148
2,MEMP-t,mean,pos_patterns.pattern_2,20135,GATA1.H13CORE.1.PSM.A,7.044860e-03,GATA6.H13CORE.0.PSM.A,7.044860e-03,ZNF226.H13CORE.0.PSGI.A,9.980200e-03,...,GATA3.H13CORE.1.SM.B,0.204958,GATA3.H13CORE.0.PS.A,0.219550,GATA4.H13CORE.0.PSM.A,0.219550,ZNF234.H13CORE.0.PSG.A,0.219550,GATA2.H13CORE.0.PSM.A,0.219550
3,MEMP-t,mean,pos_patterns.pattern_3,8978,SP3.H13CORE.0.P.B,1.181480e-06,SP1.H13CORE.2.P.B,1.181480e-06,KLF8.H13CORE.0.P.C,5.654250e-06,...,KLF11.H13CORE.0.P.B,0.000031,SP4.H13CORE.0.P.C,0.000080,KLF9.H13CORE.1.P.B,0.000150,SP2.H13CORE.0.P.D,0.000562,SP5.H13CORE.0.P.B,0.000611
4,MEMP-t,mean,pos_patterns.pattern_4,4819,FEV.H13CORE.0.S.B,9.734570e-03,ELK4.H13CORE.0.PSM.A,9.734570e-03,ELK3.H13CORE.0.PSM.A,9.734570e-03,...,ETS1.H13CORE.0.S.B,0.009735,GABPA.H13CORE.0.PSM.A,0.009735,ELF3.H13CORE.0.S.B,0.009735,ELK1.H13CORE.0.PSM.A,0.020104,ELF2.H13CORE.0.PS.A,0.021109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1921,MastP-t,mean,pos_patterns.pattern_36,49,SPIB.H13CORE.2.SM.B,9.177580e-01,SPI1.H13CORE.1.S.B,9.177580e-01,ERF.H13CORE.0.PS.A,9.177580e-01,...,PO6F1.H13CORE.0.SM.B,0.917758,ETV2.H13CORE.1.PM.A,0.917758,ZN550.H13CORE.0.P.C,0.917758,ZN132.H13CORE.0.P.C,0.998785,ELF2.H13CORE.1.M.B,0.998785
1922,MastP-t,mean,pos_patterns.pattern_37,43,ZN440.H13CORE.0.P.C,1.228330e-04,SPIB.H13CORE.1.S.C,7.202570e-01,ZN567.H13CORE.0.P.C,9.435580e-01,...,BACH2.H13CORE.1.SM.B,1.000000,PO4F1.H13CORE.1.SM.B,1.000000,ZFP82.H13CORE.0.P.C,1.000000,ELF2.H13CORE.1.M.B,1.000000,,
1923,MastP-t,mean,pos_patterns.pattern_38,41,CEBPB.H13CORE.0.P.B,3.356280e-01,CEBPA.H13CORE.0.P.B,3.356280e-01,CEBPG.H13CORE.1.SM.B,3.499340e-01,...,CEBPD.H13CORE.0.P.B,0.508493,CEBPE.H13CORE.1.SM.B,0.564369,TBR1.H13CORE.0.PS.A,0.699713,CEBPE.H13CORE.0.P.B,0.699713,PTF1A.H13CORE.0.P.B,0.699713
1924,MastP-t,mean,pos_patterns.pattern_39,40,TAL1.H13CORE.2.P.B,6.453470e-03,LYL1.H13CORE.1.P.C,6.453470e-03,LYL1.H13CORE.0.P.C,1.875660e-01,...,ETV7.H13CORE.1.P.C,0.274459,ZFP28.H13CORE.0.P.B,0.334062,TAL1.H13CORE.1.P.B,0.334062,ZNF497.H13CORE.0.SG.A,0.493740,SMAD3.H13CORE.2.P.C,0.493740


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,HSC-2,fold_0,pos_patterns.pattern_0,38404,ETV6.H13CORE.1.P.B,2.518300e-03,ERF.H13CORE.0.PS.A,3.561390e-03,EHF.H13CORE.0.P.B,4.748520e-03,...,SPIB.H13CORE.0.P.B,0.010129,ELF3.H13CORE.1.PM.A,0.010129,ELF5.H13CORE.0.PSM.A,0.015960,ETV7.H13CORE.1.P.C,0.015960,ETV2.H13CORE.1.PM.A,0.017937
1,HSC-2,fold_0,pos_patterns.pattern_1,27238,CTCF.H13CORE.0.P.B,6.466410e-11,CTCFL.H13CORE.0.P.B,8.724240e-06,ZNF503.H13CORE.0.P.B,1.000490e-01,...,PRD13.H13CORE.0.P.B,0.125249,ZIC2.H13CORE.0.P.B,0.125249,ZIC3.H13CORE.0.P.B,0.125249,NDF2.H13CORE.0.P.B,0.147023,ZIC5.H13CORE.0.P.B,0.215206
2,HSC-2,fold_0,pos_patterns.pattern_2,7811,SP3.H13CORE.0.P.B,5.674990e-07,SP1.H13CORE.2.P.B,5.674990e-07,SP4.H13CORE.0.P.C,2.418920e-06,...,SP1.H13CORE.0.P.B,0.000005,KLF9.H13CORE.1.P.B,0.000011,KLF11.H13CORE.0.P.B,0.000011,SP2.H13CORE.0.P.D,0.000036,SP5.H13CORE.0.P.B,0.000072
3,HSC-2,fold_0,pos_patterns.pattern_3,6124,RUNX3.H13CORE.0.P.B,3.702540e-02,RUNX2.H13CORE.0.P.B,3.702540e-02,RUNX1.H13CORE.0.P.B,3.702540e-02,...,RUNX3.H13CORE.2.S.B,0.429572,RUNX2.H13CORE.1.S.B,0.429572,FOXH1.H13CORE.0.P.B,0.690516,TCF7.H13CORE.0.PSM.A,0.804756,RUNX3.H13CORE.1.S.B,1.000000
4,HSC-2,fold_0,pos_patterns.pattern_4,4389,NFYB.H13CORE.0.P.B,8.819830e-01,NFYA.H13CORE.0.P.B,8.819830e-01,NFYC.H13CORE.0.P.B,8.819830e-01,...,PBX2.H13CORE.1.P.C,1.000000,MSX2.H13CORE.0.SM.B,1.000000,ZNF516.H13CORE.0.S.C,1.000000,PBX3.H13CORE.0.P.B,1.000000,ZN133.H13CORE.0.P.B,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
243,HSC-1,fold_0,pos_patterns.pattern_45,33,IRF4.H13CORE.2.SM.B,8.538310e-04,IRF5.H13CORE.0.SM.B,6.680420e-03,ZN362.H13CORE.0.P.C,6.680420e-03,...,IRF6.H13CORE.1.S.C,0.007888,IRF7.H13CORE.1.SM.B,0.009771,IRF8.H13CORE.2.SM.B,0.010804,IRF7.H13CORE.0.S.B,0.013292,ZBED4.H13CORE.0.SGIB.A,0.015780
244,HSC-1,fold_0,pos_patterns.pattern_46,31,SPI1.H13CORE.0.P.B,5.892910e-02,SPIB.H13CORE.1.S.C,5.892910e-02,FEZF1.H13CORE.0.P.B,3.851510e-01,...,SPIB.H13CORE.0.P.B,0.504884,ERF.H13CORE.0.PS.A,0.507106,ZN701.H13CORE.0.P.B,0.689958,ZN302.H13CORE.0.P.B,0.689958,ELF2.H13CORE.1.M.B,0.717337
245,HSC-1,fold_0,pos_patterns.pattern_47,31,ZSCAN2.H13CORE.0.PG.A,3.870130e-01,SPI1.H13CORE.0.P.B,9.077290e-01,NFKB2.H13CORE.0.PSM.A,9.077290e-01,...,IRF4.H13CORE.0.P.B,0.907729,SPIB.H13CORE.2.SM.B,0.907729,ZN816.H13CORE.1.P.C,0.907729,SPIB.H13CORE.1.S.C,0.907729,ETV6.H13CORE.1.P.B,0.941883
246,HSC-1,fold_0,pos_patterns.pattern_48,28,FOXE1.H13CORE.0.SM.B,1.444560e-01,FOXE1.H13CORE.1.S.C,1.444560e-01,ERF.H13CORE.0.PS.A,2.235090e-01,...,HMGA1.H13CORE.0.P.B,0.223509,ZNF362.H13CORE.0.PSGI.A,0.223509,ZFP28.H13CORE.0.P.B,0.223509,CPEB1.H13CORE.0.S.B,0.223509,ZN384.H13CORE.0.PSM.A,0.336263


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,HSC-2,fold_1,pos_patterns.pattern_0,37036,ETV6.H13CORE.1.P.B,3.077820e-03,SPI1.H13CORE.0.P.B,7.115280e-03,ERF.H13CORE.0.PS.A,7.115280e-03,...,EHF.H13CORE.0.P.B,7.281460e-03,ELF5.H13CORE.0.PSM.A,0.022144,ELF3.H13CORE.1.PM.A,0.022144,ETV7.H13CORE.1.P.C,0.027537,ETV2.H13CORE.1.PM.A,0.032777
1,HSC-2,fold_1,pos_patterns.pattern_1,27351,CTCF.H13CORE.0.P.B,4.579390e-10,CTCFL.H13CORE.0.P.B,1.099290e-05,ZNF503.H13CORE.0.P.B,9.378760e-02,...,PRD13.H13CORE.0.P.B,1.096220e-01,ZIC2.H13CORE.0.P.B,0.109622,ZIC3.H13CORE.0.P.B,0.109622,NDF2.H13CORE.0.P.B,0.149306,ZIC5.H13CORE.0.P.B,0.217534
2,HSC-2,fold_1,pos_patterns.pattern_2,8494,SP3.H13CORE.0.P.B,3.154070e-08,SP4.H13CORE.0.P.C,2.218070e-07,SP1.H13CORE.2.P.B,2.957430e-07,...,KLF11.H13CORE.0.P.B,7.037290e-07,KLF9.H13CORE.1.P.B,0.000001,KLF12.H13CORE.0.P.C,0.000004,SP2.H13CORE.0.P.D,0.000045,KLF10.H13CORE.0.P.C,0.000100
3,HSC-2,fold_1,pos_patterns.pattern_3,4906,ELK4.H13CORE.0.PSM.A,1.250890e-02,FEV.H13CORE.0.S.B,1.250890e-02,ERG.H13CORE.1.SM.B,1.250890e-02,...,ELK3.H13CORE.0.PSM.A,1.523970e-02,ETV1.H13CORE.0.PSM.A,0.015240,GABPA.H13CORE.0.PSM.A,0.015557,ELF3.H13CORE.0.S.B,0.015804,ELK1.H13CORE.0.PSM.A,0.025429
4,HSC-2,fold_1,pos_patterns.pattern_4,4475,NFYB.H13CORE.0.P.B,3.893550e-03,NFYA.H13CORE.0.P.B,3.893550e-03,NFYC.H13CORE.0.P.B,5.773210e-03,...,PBX3.H13CORE.0.P.B,2.856190e-02,PBX1.H13CORE.1.P.B,0.103088,HNF6.H13CORE.1.S.B,0.576392,ZN799.H13CORE.0.P.C,0.576392,PBX3.H13CORE.1.P.B,0.667892
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,HSC-1,fold_1,pos_patterns.pattern_44,26,KLF7.H13CORE.0.P.B,3.210470e-02,VEZF1.H13CORE.0.P.C,7.454430e-02,SP2.H13CORE.0.P.D,7.454430e-02,...,PATZ1.H13CORE.1.P.C,0.091102,SALL4.H13CORE.0.P.B,0.091102,KMT2A.H13CORE.0.P.B,0.094692,ZNF493.H13CORE.0.PSGI.A,0.113843,SP1.H13CORE.0.P.B,0.113843
239,HSC-1,fold_1,neg_patterns.pattern_0,43,ZEB2.H13CORE.0.P.B,3.325080e-01,ZEB1.H13CORE.0.P.B,7.327290e-01,ZBT49.H13CORE.0.P.C,7.947230e-01,...,ZN816.H13CORE.0.P.B,1.000000,DNTTIP1.H13CORE.0.PSG.A,1.000000,ITF2.H13CORE.1.PSM.A,1.000000,ZN124.H13CORE.0.P.C,1.000000,ZN132.H13CORE.0.P.C,1.000000
240,HSC-1,fold_1,neg_patterns.pattern_1,30,SOX30.H13CORE.1.SM.B,2.911350e-01,SOX7.H13CORE.0.PS.A,2.911350e-01,SOX3.H13CORE.0.PM.A,2.911350e-01,...,SOX5.H13CORE.0.P.B,0.291135,NR2F6.H13CORE.2.SM.B,0.355482,ERR3.H13CORE.0.PSM.A,0.355482,MITF.H13CORE.0.P.B,0.355482,ZN317.H13CORE.1.P.B,0.355482
241,HSC-1,fold_1,neg_patterns.pattern_2,27,ZN410.H13CORE.0.SM.B,1.000000e+00,ZNF90.H13CORE.0.P.C,1.000000e+00,TIGD4.H13CORE.0.PSGI.A,1.000000e+00,...,,,,,,,,,,


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,HSC-2,fold_2,pos_patterns.pattern_0,42238,ETV6.H13CORE.1.P.B,4.371040e-04,EHF.H13CORE.0.P.B,2.362700e-03,ERF.H13CORE.0.PS.A,2.362700e-03,...,ELF3.H13CORE.1.PM.A,0.008221,SPIB.H13CORE.0.P.B,0.008221,SPIB.H13CORE.2.SM.B,0.009587,ETV7.H13CORE.1.P.C,0.014513,ETS2.H13CORE.1.P.B,0.014513
1,HSC-2,fold_2,pos_patterns.pattern_1,26732,CTCF.H13CORE.0.P.B,4.017000e-12,CTCFL.H13CORE.0.P.B,1.227610e-06,ZNF503.H13CORE.0.P.B,1.304890e-01,...,PRD13.H13CORE.0.P.B,0.134001,NDF2.H13CORE.0.P.B,0.168502,ZIC2.H13CORE.0.P.B,0.168502,ZIC3.H13CORE.0.P.B,0.168502,ZIC5.H13CORE.0.P.B,0.327587
2,HSC-2,fold_2,pos_patterns.pattern_2,9000,SP1.H13CORE.0.P.B,1.571430e-05,KLF12.H13CORE.0.P.C,1.571430e-05,SP3.H13CORE.0.P.B,1.571430e-05,...,KLF11.H13CORE.0.P.B,0.000034,KLF9.H13CORE.1.P.B,0.000062,SP4.H13CORE.0.P.C,0.000076,SP5.H13CORE.0.P.B,0.000527,SP2.H13CORE.0.P.D,0.000527
3,HSC-2,fold_2,pos_patterns.pattern_3,5519,RUNX3.H13CORE.0.P.B,2.276600e-02,RUNX1.H13CORE.0.P.B,2.276600e-02,RUNX2.H13CORE.0.P.B,2.276600e-02,...,RUNX3.H13CORE.2.S.B,0.415835,RUNX2.H13CORE.1.S.B,0.415835,FOXH1.H13CORE.0.P.B,0.775282,TCF7.H13CORE.0.PSM.A,0.779941,RUNX3.H13CORE.1.S.B,0.964610
4,HSC-2,fold_2,pos_patterns.pattern_4,4494,NFYB.H13CORE.0.P.B,3.248420e-03,NFYA.H13CORE.0.P.B,3.248420e-03,NFYC.H13CORE.0.P.B,5.023370e-03,...,PBX3.H13CORE.0.P.B,0.027814,PBX1.H13CORE.1.P.B,0.099582,ZN799.H13CORE.0.P.C,0.544277,HNF6.H13CORE.1.S.B,0.595253,PBX3.H13CORE.1.P.B,0.657856
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,HSC-1,fold_2,pos_patterns.pattern_35,56,HXC9.H13CORE.0.P.B,4.676300e-01,ZNF703.H13CORE.0.P.C,5.251750e-01,HXC10.H13CORE.0.P.B,5.251750e-01,...,ZN248.H13CORE.0.P.B,0.717047,MEIS2.H13CORE.0.P.C,0.717047,HXD13.H13CORE.0.PS.A,0.717047,HXA9.H13CORE.0.P.B,0.717047,ZFP82.H13CORE.0.P.C,0.717047
257,HSC-1,fold_2,pos_patterns.pattern_36,33,FOXE1.H13CORE.0.SM.B,1.847200e-01,FOXE1.H13CORE.1.S.C,1.847200e-01,ERF.H13CORE.0.PS.A,1.847200e-01,...,CPEB1.H13CORE.0.S.B,0.248253,ZN613.H13CORE.0.P.C,0.248253,FOXI1.H13CORE.0.SM.B,0.248253,ETV7.H13CORE.1.P.C,0.248253,NFAC2.H13CORE.4.M.B,0.248253
258,HSC-1,fold_2,neg_patterns.pattern_0,31,ZBT26.H13CORE.1.SM.B,2.218430e-01,GLIS3.H13CORE.0.P.C,1.000000e+00,NR1I3.H13CORE.1.PSM.A,1.000000e+00,...,LEUTX.H13CORE.0.PSGB.A,1.000000,GLIS1.H13CORE.1.P.B,1.000000,PITX3.H13CORE.0.SM.B,1.000000,SOX2.H13CORE.1.P.B,1.000000,ZN248.H13CORE.0.P.B,1.000000
259,HSC-1,fold_2,neg_patterns.pattern_1,28,ZN214.H13CORE.0.P.B,1.000000e+00,XBP1.H13CORE.0.PS.A,1.000000e+00,PO3F2.H13CORE.2.SM.B,1.000000e+00,...,SOX30.H13CORE.0.P.C,1.000000,NR1H4.H13CORE.1.P.B,1.000000,HSF5.H13CORE.0.M.C,1.000000,,,,


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,HSC-2,fold_3,pos_patterns.pattern_0,42572,ETV6.H13CORE.1.P.B,1.163640e-03,ERF.H13CORE.0.PS.A,4.039230e-03,EHF.H13CORE.0.P.B,4.039230e-03,...,ELF5.H13CORE.0.PSM.A,0.006528,SPIB.H13CORE.0.P.B,0.006528,SPIB.H13CORE.2.SM.B,0.006528,ETV7.H13CORE.1.P.C,0.015886,ETV2.H13CORE.1.PM.A,0.015886
1,HSC-2,fold_3,pos_patterns.pattern_1,25854,CTCF.H13CORE.0.P.B,5.322150e-12,CTCFL.H13CORE.0.P.B,1.338810e-06,ZNF503.H13CORE.0.P.B,1.254000e-01,...,MUSC.H13CORE.0.SM.B,0.148837,NDF2.H13CORE.0.P.B,0.173978,ZIC3.H13CORE.0.P.B,0.173978,ZIC2.H13CORE.0.P.B,0.173978,ZIC5.H13CORE.0.P.B,0.359794
2,HSC-2,fold_3,pos_patterns.pattern_2,6619,SP1.H13CORE.0.P.B,1.608310e-06,KLF12.H13CORE.0.P.C,1.608310e-06,SP3.H13CORE.0.P.B,1.608310e-06,...,SP4.H13CORE.0.P.C,0.000003,KLF11.H13CORE.0.P.B,0.000008,KLF9.H13CORE.1.P.B,0.000016,SP2.H13CORE.0.P.D,0.000040,SP5.H13CORE.0.P.B,0.000101
3,HSC-2,fold_3,pos_patterns.pattern_3,5981,RUNX3.H13CORE.0.P.B,2.267310e-02,RUNX2.H13CORE.0.P.B,3.337300e-02,RUNX1.H13CORE.0.P.B,3.772170e-02,...,RUNX3.H13CORE.2.S.B,0.433868,RUNX2.H13CORE.1.S.B,0.433868,FOXH1.H13CORE.0.P.B,0.771254,TCF7.H13CORE.0.PSM.A,0.774489,RUNX3.H13CORE.1.S.B,1.000000
4,HSC-2,fold_3,pos_patterns.pattern_4,4148,NFYA.H13CORE.0.P.B,2.397190e-03,NFYB.H13CORE.0.P.B,2.397190e-03,NFYC.H13CORE.0.P.B,3.678280e-03,...,PBX3.H13CORE.0.P.B,0.025880,PBX1.H13CORE.1.P.B,0.106769,ZN799.H13CORE.0.P.C,0.527204,HNF6.H13CORE.1.S.B,0.545118,PBX3.H13CORE.1.P.B,0.631247
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272,HSC-1,fold_3,neg_patterns.pattern_0,73,ZN75A.H13CORE.0.SM.B,4.261350e-01,SUH.H13CORE.0.P.B,4.261350e-01,ZN611.H13CORE.1.P.C,4.261350e-01,...,ZN75D.H13CORE.1.P.B,0.426135,TF65.H13CORE.0.P.B,0.526093,CREM.H13CORE.1.P.C,0.866724,PTF1A.H13CORE.0.P.B,0.866724,ZNF76.H13CORE.1.SM.B,0.924649
273,HSC-1,fold_3,neg_patterns.pattern_1,32,ZEB1.H13CORE.0.P.B,1.487870e-01,ZEB2.H13CORE.0.P.B,4.470060e-01,TBX19.H13CORE.0.PS.A,5.495420e-01,...,,,,,,,,,,
274,HSC-1,fold_3,neg_patterns.pattern_2,28,TYY1.H13CORE.0.PSM.A,2.284980e-01,TYY2.H13CORE.0.PS.A,2.284980e-01,TYY2.H13CORE.1.SM.B,8.927860e-01,...,ZN528.H13CORE.0.P.B,1.000000,ZBTB41.H13CORE.0.PSG.A,1.000000,USF3.H13CORE.0.PSGIB.A,1.000000,ZN560.H13CORE.0.P.C,1.000000,HXD8.H13CORE.0.SM.B,1.000000
275,HSC-1,fold_3,neg_patterns.pattern_3,24,ETV6.H13CORE.1.P.B,6.172390e-04,ERF.H13CORE.0.PS.A,6.172390e-04,ELF2.H13CORE.1.M.B,2.323200e-02,...,ETV3.H13CORE.0.SM.B,0.026033,ETV7.H13CORE.1.P.C,0.026033,E4F1.H13CORE.0.P.B,0.026033,ETV1.H13CORE.1.PM.A,0.026033,SPI1.H13CORE.0.P.B,0.027704


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,HSC-2,fold_4,pos_patterns.pattern_0,35527,ETV6.H13CORE.1.P.B,2.476960e-03,ERF.H13CORE.0.PS.A,5.152620e-03,SPI1.H13CORE.0.P.B,5.152620e-03,...,SPIB.H13CORE.0.P.B,0.007875,ELF3.H13CORE.1.PM.A,0.012990,ELF5.H13CORE.0.PSM.A,0.017049,ETV7.H13CORE.1.P.C,0.020803,ETV2.H13CORE.1.PM.A,0.020803
1,HSC-2,fold_4,pos_patterns.pattern_1,27003,CTCF.H13CORE.0.P.B,2.736410e-10,CTCFL.H13CORE.0.P.B,1.244630e-05,ZNF503.H13CORE.0.P.B,9.604380e-02,...,MUSC.H13CORE.0.SM.B,0.112049,ZIC2.H13CORE.0.P.B,0.112049,ZIC3.H13CORE.0.P.B,0.112049,NDF2.H13CORE.0.P.B,0.152271,ZIC5.H13CORE.0.P.B,0.207216
2,HSC-2,fold_4,pos_patterns.pattern_2,8164,SP3.H13CORE.0.P.B,1.897970e-06,KLF11.H13CORE.0.P.B,4.203720e-06,SP4.H13CORE.0.P.C,4.203720e-06,...,SP1.H13CORE.0.P.B,0.000007,KLF12.H13CORE.0.P.C,0.000011,KLF9.H13CORE.1.P.B,0.000015,SP2.H13CORE.0.P.D,0.000054,KLF10.H13CORE.0.P.C,0.000124
3,HSC-2,fold_4,pos_patterns.pattern_3,5479,ELK4.H13CORE.0.PSM.A,1.066200e-02,FEV.H13CORE.0.S.B,1.066200e-02,ERG.H13CORE.1.SM.B,1.066200e-02,...,ELK3.H13CORE.0.PSM.A,0.014911,ETV1.H13CORE.0.PSM.A,0.014911,ETS1.H13CORE.0.S.B,0.014911,ELF3.H13CORE.0.S.B,0.014911,ETV4.H13CORE.1.SM.B,0.014911
4,HSC-2,fold_4,pos_patterns.pattern_4,4579,RUNX3.H13CORE.0.P.B,3.712160e-02,RUNX2.H13CORE.0.P.B,4.304240e-02,RUNX1.H13CORE.0.P.B,4.304240e-02,...,RUNX3.H13CORE.2.S.B,0.441967,RUNX2.H13CORE.1.S.B,0.441967,FOXH1.H13CORE.0.P.B,0.661810,TCF7.H13CORE.0.PSM.A,0.828322,RUNX3.H13CORE.1.S.B,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,HSC-1,fold_4,pos_patterns.pattern_48,25,IRF4.H13CORE.2.SM.B,4.923830e-04,IRF5.H13CORE.0.SM.B,2.130560e-03,ZN197.H13CORE.0.P.C,6.213380e-03,...,IRF4.H13CORE.1.S.B,0.008521,ZN362.H13CORE.0.P.C,0.011409,IRF8.H13CORE.2.SM.B,0.011409,ZIK1.H13CORE.0.P.C,0.014984,IRF7.H13CORE.0.S.B,0.014984
253,HSC-1,fold_4,pos_patterns.pattern_49,23,TRPS1.H13CORE.0.P.B,3.222900e-01,GATA4.H13CORE.0.PSM.A,3.222900e-01,GATA3.H13CORE.1.SM.B,5.207140e-01,...,GATA2.H13CORE.0.PSM.A,0.669193,SPIB.H13CORE.1.S.C,0.679635,GATA6.H13CORE.0.PSM.A,0.679635,SPI1.H13CORE.0.P.B,0.762873,GATA2.H13CORE.1.P.B,0.762873
254,HSC-1,fold_4,pos_patterns.pattern_50,23,IRF8.H13CORE.0.P.B,9.847310e-05,PRDM1.H13CORE.1.PS.A,1.270220e-03,IRF1.H13CORE.0.P.B,1.530890e-03,...,IRF2.H13CORE.0.PSM.A,0.001782,IRF9.H13CORE.0.PSM.A,0.001855,SPIB.H13CORE.0.P.B,0.001855,STAT2.H13CORE.0.P.B,0.006226,IRF8.H13CORE.1.SM.B,0.006226
255,HSC-1,fold_4,pos_patterns.pattern_51,20,TRPS1.H13CORE.0.P.B,1.427330e-02,GATA2.H13CORE.1.P.B,1.854520e-02,GATA1.H13CORE.0.P.B,1.854520e-02,...,GATA3.H13CORE.1.SM.B,0.064257,GATA3.H13CORE.0.PS.A,0.068033,GATA2.H13CORE.0.PSM.A,0.108912,GATA5.H13CORE.0.SM.B,0.108912,GATA1.H13CORE.1.PSM.A,0.127396


Unnamed: 0,cluster,fold,pattern,num_seqlets,match0,qval0,match1,qval1,match2,qval2,...,match5,qval5,match6,qval6,match7,qval7,match8,qval8,match9,qval9
0,HSC-2,mean,pos_patterns.pattern_0,40574,ETV6.H13CORE.1.P.B,3.035110e-03,SPI1.H13CORE.0.P.B,7.295150e-03,ERF.H13CORE.0.PS.A,7.295150e-03,...,SPIB.H13CORE.2.SM.B,0.007295,ELF3.H13CORE.1.PM.A,0.016600,ELF5.H13CORE.0.PSM.A,0.016600,ETV7.H13CORE.1.P.C,0.019499,ETV2.H13CORE.1.PM.A,0.026426
1,HSC-2,mean,pos_patterns.pattern_1,27100,CTCF.H13CORE.0.P.B,6.986220e-12,CTCFL.H13CORE.0.P.B,9.271440e-07,ZNF503.H13CORE.0.P.B,1.171670e-01,...,ZN383.H13CORE.0.P.C,0.129514,ZIC2.H13CORE.0.P.B,0.175320,ZIC3.H13CORE.0.P.B,0.175320,NDF2.H13CORE.0.P.B,0.175320,ZIC5.H13CORE.0.P.B,0.340209
2,HSC-2,mean,pos_patterns.pattern_2,8482,SP3.H13CORE.0.P.B,2.778820e-07,SP1.H13CORE.2.P.B,2.778820e-07,SP4.H13CORE.0.P.C,1.311410e-06,...,KLF11.H13CORE.0.P.B,0.000004,KLF12.H13CORE.0.P.C,0.000005,KLF9.H13CORE.1.P.B,0.000020,SP2.H13CORE.0.P.D,0.000044,KLF10.H13CORE.0.P.C,0.000099
3,HSC-2,mean,pos_patterns.pattern_3,5383,RUNX3.H13CORE.0.P.B,3.445700e-02,RUNX1.H13CORE.0.P.B,4.208530e-02,RUNX2.H13CORE.0.P.B,4.208530e-02,...,RUNX3.H13CORE.2.S.B,0.439177,RUNX2.H13CORE.1.S.B,0.439177,FOXH1.H13CORE.0.P.B,0.655356,TCF7.H13CORE.0.PSM.A,0.823109,RUNX3.H13CORE.1.S.B,1.000000
4,HSC-2,mean,pos_patterns.pattern_4,5030,FEV.H13CORE.0.S.B,1.409830e-02,ERG.H13CORE.1.SM.B,1.409830e-02,ELK4.H13CORE.0.PSM.A,1.409830e-02,...,ETS1.H13CORE.0.S.B,0.014098,GABPA.H13CORE.0.PSM.A,0.014098,ELF3.H13CORE.0.S.B,0.014098,ETS2.H13CORE.0.S.C,0.021930,ELK1.H13CORE.0.PSM.A,0.022965
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,HSC-1,mean,pos_patterns.pattern_46,31,BATF.H13CORE.0.P.B,2.005020e-02,NFE2.H13CORE.0.P.B,2.005020e-02,FOSB.H13CORE.0.P.B,2.528690e-02,...,JUND.H13CORE.0.PM.A,0.030860,BATF.H13CORE.1.P.B,0.030860,BACH2.H13CORE.0.P.B,0.030860,BATF3.H13CORE.0.P.B,0.047029,BATF3.H13CORE.1.P.B,0.047029
253,HSC-1,mean,pos_patterns.pattern_47,27,SP4.H13CORE.2.P.B,1.597320e-04,KLF8.H13CORE.1.P.C,1.597320e-04,KLF12.H13CORE.0.P.C,1.597320e-04,...,KLF9.H13CORE.1.P.B,0.000335,MAZ.H13CORE.1.P.B,0.000452,KLF13.H13CORE.1.P.C,0.000452,SP4.H13CORE.0.P.C,0.000452,SP1.H13CORE.2.P.B,0.001110
254,HSC-1,mean,pos_patterns.pattern_48,26,SPI1.H13CORE.0.P.B,3.770870e-03,SPI1.H13CORE.1.S.B,1.450420e-02,SPIB.H13CORE.1.S.C,1.527940e-02,...,PRDM1.H13CORE.1.PS.A,0.062392,SPIB.H13CORE.0.P.B,0.090110,ELF2.H13CORE.1.M.B,0.123089,ZN816.H13CORE.1.P.C,0.129315,ELF4.H13CORE.1.M.B,0.129315
255,HSC-1,mean,pos_patterns.pattern_49,24,GATA1.H13CORE.0.P.B,7.621920e-06,GATA2.H13CORE.1.P.B,4.072380e-05,TAL1.H13CORE.0.P.B,4.072380e-05,...,GATA3.H13CORE.0.PS.A,0.002746,TRPS1.H13CORE.0.P.B,0.002800,GATA4.H13CORE.0.PSM.A,0.004816,GATA6.H13CORE.0.PSM.A,0.013097,GATA3.H13CORE.1.SM.B,0.020150


# Export Files

In [8]:
# Save to TSV and XLSX
os.makedirs("motifs", exist_ok=True)

for k, fs in df_dict.items():
    for f, df in fs.items():
        if df is not None:
            df.to_csv(f"motifs/df_counts_motifs_{k}_{f}.tsv.gz", sep="\t", index=False)
            df.to_excel(f"motifs/df_counts_motifs_{k}_{f}.xlsx", index=False)