In [None]:
!pip install bionumpy
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/ctcf_chr21-22.bed.gz
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/CTCF_chr21-22.wig.gz
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/chr21-22.chrom.sizes
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/ctcf_chr21-22.bam
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/chr21a22.gtf

In [None]:
import plotly.express as px
import numpy as np
import bionumpy as bnp

## Example 1: Getting a read pileup from a BAM-file

In [None]:
# Reading a genome and reads from a bam file
genome = bnp.Genome.from_file("chr21-22.chrom.sizes")
reads = genome.read_intervals("ctcf_chr21-22.bam")
print(reads)

# Getting read pileup (stored efficiently as a RunLengthArray)
pileup = reads.get_pileup()

# We can index any region
region = pileup["chr22"][19970400:19970800]

px.line(region.to_array()).show()

## Example 2: Finding mean read pileup value around transcription start sites

In [None]:
# Read genome, a wig read pileup and transcripts
genome = bnp.Genome.from_file("chr21-22.chrom.sizes", sort_names=True)
track = genome.read_track("CTCF_chr21-22.wig.gz", stream=True)
annotation = genome.read_annotation("chr21a22.gtf")
transcripts = annotation.transcripts

# Get transcript start locations and make windows around them
tss = transcripts.get_location('start')
windows = tss.get_windows(flank=500)

# Get mean read pileup within these windows and plot
signals = track[windows]
mean_signal = signals.mean(axis=0)
signal, = bnp.compute(mean_signal)
signal = signal.to_array()

px.line(x=np.arange(-500, 500), y=signal, title="Read pileup relative to TSS start",
        labels={"x": "Position relative to TSS start", "y": "Mean read pileup"}).show()