In [19]:
!head hg38_Arima2.bed

chr1	11160	11164	RestrictionType
chr1	12411	12415	RestrictionType
chr1	12461	12465	RestrictionType
chr1	12686	12690	RestrictionType
chr1	12829	12833	RestrictionType
chr1	13315	13319	RestrictionType
chr1	13420	13424	RestrictionType
chr1	13566	13570	RestrictionType
chr1	13698	13702	RestrictionType
chr1	13915	13919	RestrictionType


In [25]:
from Bio import SeqIO
from Bio.Restriction import RestrictionBatch
from Bio.Seq import Seq
import sys
from itertools import pairwise

def get_cut_sites(seq_record, enzymes):
    cut_sites = []
    analysis = enzymes.search(seq_record.seq)
    for enzyme in analysis:
        positions = [0] + analysis[enzyme]
        for pos in pairwise(positions):
            cut_sites.append((seq_record.id, *pos))
    return cut_sites

def write_bed_file(cut_sites, output_file):
    with open(output_file, "w") as bed_file:
        for site in cut_sites:
            bed_file.write(f"{site[0]}\t{site[1]}\t{site[2]}\n")

def main(reference_genome, enzyme_names, output_file):
    # Load the reference genome
    genome = SeqIO.to_dict(SeqIO.parse(reference_genome, "fasta"))

    enzymes = RestrictionBatch(enzyme_names)
    
    # Process each sequence record in the genome
    cut_sites = []
    for seq_record in genome.values():
        cuts = get_cut_sites(seq_record, enzymes)
        cut_sites.extend(cuts)
    
    # Write to BED file
    write_bed_file(cut_sites, output_file)

if __name__ == "__main__":
    reference_genome = "hg38.fna"
    output_file = "hg38_Arima2.bed"
    
    enzyme_names = ['DpnII', 'HinfI']
    
    main(reference_genome, enzyme_names, output_file)

BED file created: hg38_Arima2.bed


In [28]:
!head hg38_Arima2.bed

chr1	0	11160
chr1	11160	12411
chr1	12411	12461
chr1	12461	12686
chr1	12686	12829
chr1	12829	13315
chr1	13315	13420
chr1	13420	13566
chr1	13566	13698
chr1	13698	13915


In [24]:
v = [1, 2]
def fun(a, b, c):
    print(a, b, c)

fun(1, *v)

1 1 2
