# Initialize

## from tracks

In [None]:
%sx mount /media/data
from trackanalysis import *
from anastore.api import __TASKS__, modifyclasses, DELETE, CNT
TRACKS = TracksDict("/media/data/sirius/Andreas/ssHP6_Sequencing_20180102/*/*.ana", match = ".*OR3_mix_(...)_.*")
TRACKS['ref'] = "/media/data/sirius/Andreas/ssHP6_Sequencing_20180102/Reference_oligo/Test_020_044_ssHP6_OR3-10_100nM_OR3-11_20nM_after_saturation.ana"
patt       = re.compile(".*_OR3_mix_(.*?)_.*")
def _remfits(data):
    def _mod(x):
        if patt.match(x[CNT][0]):
            x[CNT] = [TRACKS[patt.match(x[CNT][0]).group(1)].path[0][:-3]+"trk"]
        else:
            x[CNT] = [TRACKS['ref'].path[0][:-3]+"trk"]
        return x

    modifyclasses(data,
                  "model.task.track.TrackReaderTask",
                  {'path': _mod},
                  "peakcalling.processor.fittoreference.FitToReferenceTask",
                  DELETE,
                  "peakcalling.processor.fittohairpin.FitToHairpinTask",
                  DELETE)
    return data

__TASKS__.patch(_remfits)
TRACKS.save("/home/pol/Documents/tracks/rnasequencing/ssHP6_Sequencing_20180102/")

## from pk

In [None]:
from trackanalysis import *

TRACKS = TracksDict("/home/pol/Documents/tracks/rnasequencing/ssHP6_Sequencing_20180102/*.pk")
TRACKS['GCA'].tasks.subtraction            = 0, 1, 4, 6, 8, 9
TRACKS['GCA'].tasks.cleaning.maxsaturation = 100.

# Viewing Data

In [None]:
%%opts Curve(alpha=0.5)
TRACKS.cleancycles

# Alignments

In [None]:
from peakcalling.toreference import CorrectedHistogramFit, Pivot

def _createtrack(beads):
    TRACKS.load()
    out = TRACKS.peaks[list(beads) if beads else ...].dataframe(Tasks.singlestrand(), events = dict(std = 'std'), resolution = 'resolution')
    return (out
            .reset_index()
            .set_index('track')
            .join(TRACKS
                  .dataframe()[['key', 'modification']]
                  .rename(columns = dict(key = 'track'))
                  .set_index('track'))
            .reset_index('track')
            .sort_values(['modification'])
           )

def alignpeaks(data, bead, normalize = False, **kwa):
    data   = data[data.bead == bead]
    tracks = [(i, data.reset_index()[lambda x: x.track == i]) for i in data.track.unique()]
    if len(kwa):
        tracks = [(i, j if i not in kwa else j[j.peakposition < j.peakposition.unique()[kwa[i]]])
                  for i, j in tracks]
    tracks   = [(i, j.assign(peakposition = lambda x: x.peakposition-x.peakposition.max(),
                             avg          = lambda x: x.avg -x.peakposition.max()))
              for i, j in tracks]
    
    if normalize:
        fit     = CorrectedHistogramFit(pivot = Pivot.absolute)
        frompks = lambda pks: (pks.groupby('peakposition').resolution.first()
                               .reset_index().values)
        pks     = {i: fit.frompeaks(j.groupby('peakposition').resolution.first().reset_index().values)
                   for i, j in tracks}
        corr    = {i: fit.optimize(pks['ref'], j) for i, j in pks.items()}
        tracks  = [(i, j.assign(peakposition = lambda x: (x.peakposition-corr[i][2])*corr[i][1],
                                avg          = lambda x: (x.avg-corr[i][2])*corr[i][1]))
                   for i, j in tracks]

    out  = pd.concat([i for _, i in tracks])
    zero = np.nanmedian(out.groupby('track').peakposition.min())
    return out.assign(peakposition = out.peakposition - zero,
                      avg          = out.avg - zero)
def showpeaks(data, bead, **kwa):
    data = alignpeaks(data, bead, **kwa).sort_values(['modification'])
    return hv.Scatter(data, "track", "avg")(plot=dict(jitter=.75), style = dict(alpha=.3))*hv.Scatter(data, "track", "peakposition")

SHELF = LazyShelf("/home/pol/Documents/tracks/rnasequencing/ssHP6_Sequencing_20180102/shelf")
SHELF["PEAKS"] = lambda: _createtrack([18])

In [None]:
showpeaks(SHELF['PEAKS'], 18, AGC = -1, GGC = -1, GCA = -1, normalized = True)

In [None]:
DATA = alignpeaks(SHELF['PEAKS'], 18, AGC = -1, GGC = -1, GCA = -1, normalized = True)
BARR = DATA[lambda x: x.track=='ref'].peakposition.unique()[[3,4]] + [3e-3, -3e-3]
GOOD = (DATA
        [lambda x: x.peakposition > BARR[0]]
        [lambda x: x.peakposition < BARR[1]])
pd.DataFrame({sequences.Translator.reversecomplement(i).lower(): pd.Series(GOOD[lambda x: x.track == i].peakposition.unique())
              for i in set(GOOD.track.unique()) - {'ref'}}).to_csv("/tmp/data.csv")