In [52]:
import numpy as np
import pandas as pd
import os
import seqlogo
import logomaker
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from PIL import Image
import io



In [53]:

def parse_meme_file(file_path):
    motifs = []
    current_motif = []
    reading_motif = False
    motif_name = ""
    
    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('MOTIF'):
                if current_motif:  # 当遇到新的MOTIF时，保存之前的motif
                    motifs.append((motif_name.strip(), pd.DataFrame(current_motif, columns=['A', 'C', 'G', 'T'])))
                    current_motif = []
                motif_name = line.split(None, 1)[1]  # 获取MOTIF后面的名称部分
                reading_motif = False
            elif line.startswith('letter-probability matrix'):
                reading_motif = True
            elif reading_motif and line.strip() and not line.startswith('---'):
                values = [float(num) for num in line.split()]
                current_motif.append(values)
    
    # 添加最后一个motif
    if current_motif:
        motifs.append((motif_name.strip(), pd.DataFrame(current_motif, columns=['A', 'C', 'G', 'T'])))
    
    return motifs


In [54]:

file_path = 'meme_file_multitask_TRASH_X.txt'
motifs = parse_meme_file(file_path)

pdf_dir = 'tfmodisco_seqlogo_X'
os.makedirs(pdf_dir, exist_ok=True)

In [55]:
for motif_name, motif_df in motifs:
    if not motif_df.empty:
        ppm = seqlogo.Ppm(motif_df.T)  
        png_path = os.path.join(pdf_dir, f'{motif_name}.png')
        seqlogo.seqlogo(ppm, ic_scale=True, format='png', size='medium', filename=png_path)
    else:
        print(f"{motif_name} is empty.")