# THis notebook will allow you to overlap positive and negative bigwigs (containing RPM-normalized densities) with any region, returning the stranded values across that region.  

### Note: positions with zero coverage will return nans. 
### Note: Be aware that this is stranded, so negative values will return values going the opposite direction (ie. upstream - downstream). If you get confused, always a safe bet to check a genome browser!


In [1]:
import pandas as pd
import numpy as np
import pyBigWig
import pysam
import os
import pybedtools
from tqdm import tnrange, tqdm_notebook

In [2]:
class Density:
    def values(self, chrom, start, end, strand):
        return 0


class ReadDensity(Density):
    """
    ReadDensity class. See 
    https://github.com/byee4/rbp-maps/maps/density/ReadDensity.py 
    for a real example
    
    Attributes:
        self.pos(positive *.bw file)
        self.neg(negative *.bw file)
    """

    def __init__(self, pos, neg, name=None, bam=None):
        try:
            self.pos = pyBigWig.open(pos)
            self.neg = pyBigWig.open(neg)
        except Exception as e:
            print("couldn't open the bigwig files!")
            print(e)

    def values(self, chrom, start, end, strand):
        """
        Parameters
        ----------
        chrom : basestring
            (eg. chr1)
        start : int
            0-based start (first position in chromosome is 0)
        end : int
            1-based end (last position is not included)
        strand : str
            either '+' or '-'
        Returns
        -------
        densites : list
            values corresponding to density over specified positions.
        """

        try:
            if strand == "+":
                return self.pos.values(chrom, start, end)
            elif strand == "-":
                return list(reversed(self.neg.values(chrom, start, end)))
            else:
                print("Strand neither + or -")
                return 1
        except RuntimeError:
            # usually occurs when no chromosome exists in the bigwig file
            return [np.NaN] * abs(start - end)

# For negative strand, values will be negative! Also this function will flip values so reported densities will always go from upstream -> downstream

In [3]:

rdd = ReadDensity(
    pos='/projects/ps-yeolab3/encode/analysis/encode_master/218_01_TRA2A.merged.r2.norm.pos.bw',
    neg='/projects/ps-yeolab3/encode/analysis/encode_master/218_01_TRA2A.merged.r2.norm.neg.bw'
)
# chr7:23,552,262-23,552,812
values = rdd.values(
    chrom='chr7',
    start=23552262,
    end=23552812,
    strand='-'
)
values[:10]

[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]

# "pileup" densities across TX regions
- just the first five as an example

In [4]:
bed_head = ['chrom','start','end','name','score','strand']
example_region = pd.read_csv('/projects/ps-yeolab3/bay001/maps/current_annotations/all_txStart_k562.bed', sep='\t', names=bed_head).sort_values(by=['chrom','start','end'])
example_region = example_region.head()
bedtool = pybedtools.BedTool.from_dataframe(example_region)
example_region

Unnamed: 0,chrom,start,end,name,score,strand
74513,chr1,11569,12170,chr1_11869_+_ENSG00000223972.4,0,+
74514,chr1,11572,12173,chr1_11872_+_ENSG00000223972.4,0,+
74516,chr1,11574,12175,chr1_11874_+_ENSG00000223972.4,0,+
74587,chr1,11710,12311,chr1_12010_+_ENSG00000223972.4,0,+
83076,chr1,24586,25187,chr1_24886_-_ENSG00000227232.4,0,-


In [5]:
progress = tnrange(len(bedtool))
densities = []
for interval in bedtool:
    densities.append(
        rdd.values(
            chrom=interval.chrom,
            start=interval.start,
            end=interval.end,
            strand=interval.strand
        )
    )
    progress.update(1)

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

In [6]:
pd.DataFrame(densities)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,591,592,593,594,595,596,597,598,599,600
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
