In [1]:
import pysam

bam_out = "synthetic_test.bam"
sam_out = "synthetic_test.sam"


# Reference header
header = {
    'HD': {'VN': '1.0'},
    'SQ': [{'LN': 1000, 'SN': 'chr1'}]
}

reads = [
    # name, flag, chrom, start, strand_tag, seq
    ('read1', 0, 'chr1', 101, '+', 'A'*50),
    ('read2', 0, 'chr1', 201, '+', 'T'*50),
    ('read3', 16, 'chr1', 252, '-', 'C'*50),
    ('read4', 16, 'chr1', 352, '-', 'G'*50),
]

with pysam.AlignmentFile(bam_out, "wb", header=header) as outf:
# with pysam.AlignmentFile(sam_out, "w", header=header) as outf:
    for qname, flag, rname, pos, ts_strand, seq in reads:
        a = pysam.AlignedSegment()
        a.query_name = qname
        a.flag = flag
        a.reference_id = outf.get_tid(rname)
        a.reference_start = pos - 1  # 0-based
        a.mapping_quality = 60
        a.cigarstring = '50M'
        a.query_sequence = seq
        a.query_qualities = pysam.qualitystring_to_array('I'*50)

        # Add the TS tag: transcript strand
        a.set_tag('ts', ts_strand, value_type='A')

        outf.write(a)

In [2]:
pysam.sort("-o", "synthetic_test.sorted.bam", "synthetic_test.bam")
pysam.index("synthetic_test.sorted.bam")

''

In [3]:
for read in pysam.AlignmentFile("synthetic_test.sorted.bam", "rb"):
    print(read.query_name, read.tags)

read1 [('ts', '+')]
read2 [('ts', '+')]
read3 [('ts', '-')]
read4 [('ts', '-')]


In [4]:
# first flip reads
import pysam
reverse_strand = {0: 16, 16: 0}
with pysam.AlignmentFile("synthetic_test.sorted.bam", "rb",
        threads=8) as input_bam:
    with pysam.AlignmentFile("synthetic_test.flipped.bam", "wb",
                             template=input_bam,
                             threads=8) as output_bam:
        for read in input_bam:
            if read.has_tag('ts') and read.flag in reverse_strand:
                if read.get_tag('ts') == '-':
                    read.flag = reverse_strand[read.flag]
                    read.set_tag('ts', '+')
                output_bam.write(read)

In [5]:
pysam.sort("-o", "synthetic_test.flipped.sorted.bam", "synthetic_test.flipped.bam")
pysam.index("synthetic_test.flipped.sorted.bam")

''

In [None]:
module load bedtools
bedtools bamtobed -i synthetic_test.flipped.sorted.bam | awk '{{
    print $1"\t"$2"\t"($2+1)"\t"$4"\t"$5"\t"$6
}}' > synthetic_test.bed