In [1]:
import subprocess
import pysam
import shutil
import os

# 设置BAM文件目录
bam_dir = "/data/haocheng/data/bam/GM/"
bam_files = sorted([f for f in os.listdir(bam_dir) if f.endswith('.bam')])

# 获取第一个和第二个BAM文件的路径
bam_file1 = os.path.join(bam_dir, bam_files[0])
bam_file2 = os.path.join(bam_dir, bam_files[1])

# 定义输出BigWig文件的路径
bw_file1 = bam_file1.replace('.bam', '.bw')
bw_file2 = bam_file2.replace('.bam', '.bw')
merged_bw_file = os.path.join(bam_dir, 'merged_output.bw')
temp_file = '/data/haocheng/data/bam/GM/temp.bw'
temp_dir = "/data/haocheng/data/bam/GM/temp/"
os.makedirs(temp_dir, exist_ok=True)

def get_chromosome_lengths(bam_file):
    samfile = pysam.AlignmentFile(bam_file, "rb")
    chrom_lengths = {chrom: samfile.get_reference_length(chrom) for chrom in samfile.references if chrom.startswith('chr') and chrom != 'chrM'}
    samfile.close()
    return chrom_lengths

def convert_bam_to_bw(bam_file, bw_file):
    chrom_lengths = get_chromosome_lengths(bam_file)
    temp_bw_files = []

    regions = [f"{chrom}:1:{length}" for chrom, length in chrom_lengths.items() if chrom in [f"chr{i}" for i in range(1, 23)] + ["chrX", "chrY"]]
    for region in regions:
        temp_bw_file = os.path.join(temp_dir, f"{os.path.basename(bam_file).replace('.bam', '')}_{region.replace(':', '-')}.bw")
        temp_bw_files.append(temp_bw_file)
        command = [
            "bamCoverage",
            "-b", bam_file,
            "-o", temp_bw_file,
            "--binSize", "1",
            "--region", region,
            "--outFileFormat", "bigwig" 
        ]
        print(f"正在转换区域: {region}...")
        subprocess.run(command, check=True)
        print(f"{region} 转换完成。")
    
    # 合并所有小的BigWig文件为一个大文件
    merge_command = ["wiggletools", "write", bw_file, "sum"] + temp_bw_files
    subprocess.run(merge_command, check=True)
    print(f"所有区域合并完成: {bw_file}")


In [None]:
import subprocess
import os

# 设置BAM文件目录
bam_dir = "/data/haocheng/data/bam/GM/"
bam_files = sorted([f for f in os.listdir(bam_dir) if f.endswith('.bam')])

def index_bam(bam_file):
    bai_file = bam_file + ".bai"
    if not os.path.exists(bai_file):
        print(f"Indexing {bam_file}...")
        subprocess.run(["samtools", "index", bam_file], check=True)
        print(f"Indexing complete: {bam_file}")
    else:
        print(f"Index already exists for {bam_file}")

# 遍历目录中的所有BAM文件，生成索引
for bam_file in bam_files:
    bam_file_path = os.path.join(bam_dir, bam_file)
    index_bam(bam_file_path)


In [2]:
# 获取所有临时 BigWig 文件的路径
temp_bw_files = [os.path.join(temp_dir, f) for f in os.listdir(temp_dir) if f.endswith('.bw')]

# 合并所有小的 BigWig 文件为一个大文件
merge_command = ["wiggletools", "write", bw_file2, "sum"] + temp_bw_files
print("正在合并所有区域...")
subprocess.run(merge_command, check=True)
print(f"所有区域合并完成: {bw_file2}")

正在合并所有区域...


File /data/haocheng/data/bam/GM/ENCFF159TDB.bw already exists, please delete it if you want to overwrite it.


CalledProcessError: Command '['wiggletools', 'write', '/data/haocheng/data/bam/GM/ENCFF159TDB.bw', 'sum', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr1-1-248956422.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr2-1-242193529.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr3-1-198295559.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr4-1-190214555.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr5-1-181538259.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr6-1-170805979.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr7-1-159345973.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr8-1-145138636.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr9-1-138394717.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr10-1-133797422.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr11-1-135086622.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr12-1-133275309.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr13-1-114364328.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr14-1-107043718.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr15-1-101991189.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr16-1-90338345.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr17-1-83257441.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr18-1-80373285.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr19-1-58617616.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr20-1-64444167.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr21-1-46709983.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chr22-1-50818468.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chrX-1-156040895.bw', '/data/haocheng/data/bam/GM/temp/ENCFF123IWQ_chrY-1-57227415.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr1-1-248956422.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr2-1-242193529.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr3-1-198295559.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr4-1-190214555.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr5-1-181538259.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr6-1-170805979.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr7-1-159345973.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr8-1-145138636.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr9-1-138394717.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr10-1-133797422.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr11-1-135086622.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr12-1-133275309.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr13-1-114364328.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr14-1-107043718.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr15-1-101991189.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr16-1-90338345.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr17-1-83257441.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr18-1-80373285.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr19-1-58617616.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr20-1-64444167.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr21-1-46709983.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chr22-1-50818468.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chrX-1-156040895.bw', '/data/haocheng/data/bam/GM/temp/ENCFF159TDB_chrY-1-57227415.bw']' returned non-zero exit status 1.

In [None]:

# 转换两个BAM文件
print(f"Converting {bam_file1} to {bw_file1}...")
convert_bam_to_bw(bam_file1, bw_file1)
print(f"Conversion complete: {bw_file1}")

print(f"Converting {bam_file2} to {bw_file2}...")
convert_bam_to_bw(bam_file2, bw_file2)
print(f"Conversion complete: {bw_file2}")


In [10]:
# 使用bigwigMerge叠加BigWig文件
def merge_bigwig_files(bw_file1, bw_file2, merged_bw_file):
    command = [
        'bigWigMerge',
        bw_file1,
        bw_file2,
        merged_bw_file
    ]
    subprocess.run(command, check=True)
    shutil.copy(merged_bw_file, temp_file)

# 叠加BigWig文件
print(f"Merging {bw_file1} and {bw_file2} into {merged_bw_file}...")
merge_bigwig_files(bw_file1, bw_file2, merged_bw_file)
print(f"Merged BigWig file saved as '{merged_bw_file}'.")


Merging /data/haocheng/data/bam/GM/ENCFF123IWQ.bw and /data/haocheng/data/bam/GM/ENCFF159TDB.bw into /data/haocheng/data/bam/GM/merged_output.bw...


/data/haocheng/data/bam/GM/ENCFF123IWQ.bw is not a big wig file


CalledProcessError: Command '['bigWigMerge', '/data/haocheng/data/bam/GM/ENCFF123IWQ.bw', '/data/haocheng/data/bam/GM/ENCFF159TDB.bw', '/data/haocheng/data/bam/GM/merged_output.bw']' returned non-zero exit status 255.

In [None]:

# 从数组中移除已使用的 BAM 文件
bam_files = bam_files[2:]

while bam_files:
    next_bam = os.path.join(bam_dir, bam_files[0])
    next_bw_file = next_bam.replace('.bam', '.bw')  # 新的 BigWig 文件名
    print(f"Converting {next_bam} to {next_bw_file}...")
    convert_bam_to_bw(next_bam, next_bw_file)
    print(f"Conversion complete: {next_bw_file}")

    print(f"Merging {next_bw_file} with {temp_file} into {merged_bw_file}...")
    merge_bigwig_files(next_bw_file, temp_file, merged_bw_file)
    print(f"Merged BigWig file saved as '{merged_bw_file}'.")

    bam_files = bam_files[1:]  # 删除第一个元素
