# Setup

In [7]:
from scripting import *
from scripting import carryover
import pandas as pd
from functools import reduce

In [8]:
TRACKS = TracksDict("/home/david/work/scripts/sequencing/RNA/Remi_2018_03_12/tracks/*.pk")
GOODS = [14, 36, 41, 45, 61, 71, 72]
print(TRACKS.keys())

dict_keys(['CGT', 'GTG', 'TCA', 'GTC', 'AGC', 'GAG', 'GGT', 'TCG', 'CGG', 'GCA', 'GCG', 'TGG', 'GGA', 'TGA', 'AGG', 'CAG', 'OR3', 'GGC'])


# Alignment

In [9]:
from peakcalling.toreference import CorrectedHistogramFit, ChiSquareHistogramFit
from peakcalling import Range

def _createtrack(beads):
    TRACKS.load()
    #Tasks.peakselector.let(align=None)
    #Tasks.peakselector.let(finder=ByHistogram())
    out = TRACKS.peaks[list(beads) if beads else ...].dataframe(Tasks.singlestrand(), events = dict(std = 'std'), resolution = 'resolution')
    out = out.loc[:,out.columns!="modification"]
    return (out
            .reset_index()
            .set_index('track')
            .join(TRACKS
                  .dataframe()[['key', 'modification']]
                  .rename(columns = dict(key = 'track'))
                  .set_index('track'))
            .reset_index('track')
            .sort_values(['modification'])
           )
def alignpeaks(data, bead, normalize = False, **kwa):
    data   = data[data.bead == bead]
    TRACKS = [(i, data.reset_index()[lambda x: x.track == i]) for i in data.track.unique()]
    if len(kwa):
        TRACKS = [(i, j if i not in kwa else j[j.peakposition < j.peakposition.unique()[kwa[i]]])
                  for i, j in TRACKS]
    # try to remove peaks corresponding to hairpin still open in phase 4 
    TRACKS   = [(i, j.assign(peakposition = lambda x: x.peakposition-x.peakposition.max(),
                             avg          = lambda x: x.avg -x.peakposition.max()))
              for i, j in TRACKS]
    
    if normalize:
        stretch = kwa.get("stretch",Range(1,.05,.02))
        bias    = kwa.get("bias",Range(0,.1,.01))
        fit     = CorrectedHistogramFit(stretch=stretch,bias=bias)
        #fit     = ChiSquareHistogramFit(stretch=stretch,bias=bias)
        frompks = lambda pks: (pks.groupby('peakposition').resolution.first()
                               .reset_index().values)
        pks     = {i: fit.frompeaks(j.groupby('peakposition').resolution.first().reset_index().values)
                   for i, j in TRACKS}
        corr    = {i: fit.optimize(pks['OR3'], j) for i, j in pks.items()}
        TRACKS  = [(i, j.assign(peakposition = lambda x: (x.peakposition-corr[i][2])*corr[i][1],
                                avg          = lambda x: (x.avg-corr[i][2])*corr[i][1]))
                   for i, j in TRACKS]

    out  = pd.concat([i for _, i in TRACKS])
    zero = np.nanmedian(out.groupby('track').peakposition.min())
    return out.assign(peakposition = out.peakposition - zero,
                      avg          = out.avg - zero)
def showpeaks(data, bead, **kwa):
    data = alignpeaks(data, bead, **kwa).sort_values(['modification'])
    return hv.Scatter(data, "track", "avg")(plot=dict(jitter=.75), style = dict(alpha=.3))*hv.Scatter(data, "track", "peakposition")
    

In [10]:
DATA = _createtrack(GOODS)
ALIGNEDDATA = alignpeaks(DATA,14,normalize=True,bias=Range(0,0.01,0.001),stretch = Range(1,0.01,0.005))

# Carry Over
check if there is any carry over, probably

In [11]:
# selecting the region of interest to sequence
subdata = ALIGNEDDATA.query("peakposition> 0.25 and peakposition <0.36 and bead == 14")

In [12]:

duplicate=carryover.DuplicateData(path = "./initialtracks/*.trk",match = r".*040_(\w{3}).*.trk")
carry = carryover.CarryOver()

In [13]:
pairs = list(carry.find((duplicate(subdata))))

In [14]:
print(len(pairs))

11


## plot possible carry over

In [15]:
hvpairs = carry.overlaypairs(pairs)

align = hv.Scatter(ALIGNEDDATA, "track", "avg")(plot=dict(jitter=.75), style = dict(alpha=.3))*hv.Scatter(ALIGNEDDATA, "track", "peakposition") 

align*hvpairs