In [None]:
!pip install bionumpy
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/ctcf_chr21-22.bed.gz
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/CTCF_chr21-22.wig.gz
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/chr21-22.chrom.sizes
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/ctcf_chr21-22.bam
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/chr21a22.gtf
!wget https://raw.githubusercontent.com/bionumpy/bionumpy/dev/example_data/1000Genomes_chr21-22.vcf.gz

In [None]:
import plotly.express as px
import numpy as np
import bionumpy as bnp

In [None]:
import bionumpy as bnp
import plotly.express as px
import numpy as np

# Reading a genome and reads from a bam file
genome = bnp.Genome.from_file("chr21-22.chrom.sizes")
reads = genome.read_intervals("ctcf_chr21-22.bam")

# Getting read pileup (stored efficiently as a RunLengthArray)
pileup = reads.get_pileup()

# We can index any region
region = pileup["chr22"][19970400:19970800]
px.line(region.to_array()).show()

In [None]:
# Read genome, a wig read pileup and transcripts
genome = bnp.Genome.from_file("chr21-22.chrom.sizes", sort_names=True)
annotation = genome.read_annotation("chr21a22.gtf")
transcripts = annotation.transcripts

# Get transcript start locations and make windows around them
tss = transcripts.get_location('start').sorted() # Make sure the transcripts are sorted alphabetically
windows = tss.get_windows(flank=500)

# Get mean read pileup within these windows and plot
track = genome.read_track("CTCF_chr21-22.wig.gz", stream=True)
signals = track[windows]
mean_signal = signals.mean(axis=0)
signal = bnp.compute(mean_signal)  # Compute the actual value

px.line(x=np.arange(-500, 500), y=signal.to_array(),
        title="Read pileup relative to TSS start",
        labels={"x": "Position relative to TSS start", "y": "Mean read pileup"}).show()


In [None]:
import plotly.graph_objs as go

# Read genome and peaks
genome = bnp.Genome.from_file("chr21-22.chrom.sizes").with_ignored_added(['chrEBV'])
peaks = bnp.open("ctcf_chr21-22.bed.gz", buffer_type=bnp.NarrowPeakBuffer).read()
location_entries = bnp.LocationEntry(peaks.chromosome, peaks.start+peaks.summit)

# Create locations of peaks summits
summits = genome.get_locations(location_entries).sorted()

# Create windows around summits and extract read pileup
windows = summits.get_windows(flank=200)
reads = genome.read_intervals("ctcf_chr21-22.bam", stream=False, stranded=True)

# Get mean pileup for reads with negative and positive strand
signals_dict = {strand: reads[reads.strand == strand].get_pileup()[windows].mean(axis=0)
                for strand in '+-'}
signals_dict = bnp.compute(signals_dict)
go.Figure(
    [go.Scatter(x=np.arange(-200, 200), y=signal.to_array(), name=f'{strand} Strand')
     for strand, signal in signals_dict.items()],
    layout={'title': 'Summit plot',
            'xaxis_title': 'Distance from peak summit',
            'yaxis_title': 'Read coverage'}).show()

In [None]:
# Read genome and variants
genome = bnp.Genome.from_file("chr21-22.chrom.sizes")
variants = genome.read_locations("1000Genomes_chr21-22.vcf.gz", has_numeric_chromosomes=True)

# Get windows around variants and get read pileup in these windows
flank = 100
windows = variants.get_windows(flank=flank)
reads = genome.read_intervals("ctcf_chr21-22.bam", stream=True, stranded=True)
track = reads.get_pileup()
signals = track[windows]

# Get mean signal inside these windows and plot
mean_signal = signals.mean(axis=0)
signal = bnp.compute(mean_signal)
signal = signal.to_array()

px.line(x=np.arange(-flank, flank), y=signal,
        title="Read pileup relative to common variants",
        labels={"x": "Position relative to variant location", "y": "Mean read pileup"}).show()
