In [6]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from time import time
from functions import utils
from functions import utils_dtw
import os.path as path

# User selects a pacbio dataset
sample_file = utils.gui_fname().decode("utf-8")
tokens = sample_file.split('/')
base = path.basename(sample_file)
abs_dir = path.dirname(sample_file)
prefix = base.split('.')[0]
sample_prefix = abs_dir + '/' + prefix
print("Sample: \n%s" %sample_file)

# User input
N_CLASS = 3
CLS_EXAMPLE = [160027, 64050, 33104] # indices of manually selected class examples
init_class = []

Sample: 
/Users/ashleefeng/OneDrive - Johns Hopkins/PacBio/181207_Ashlee/30s_43N43/m181207_211324_42131_c000468682559900001500000112312060_s1_p0.mcd.h5


## Load data

In [2]:
trc_filename = sample_prefix + '.trc.h5'
mcd_filename = sample_prefix + '.mcd.h5'
upd_filename = sample_prefix + '.upd.h5'
meta_filename = sample_prefix + '.metadata.xml'

trc_file = h5py.File(trc_filename, 'r')
mcd_file = h5py.File(mcd_filename, 'r')
upd_file = h5py.File(upd_filename, 'r')

# Load data in trace file
dset = trc_file['TraceData']
raw_traces = dset['Traces']
decode = dset['Codec']['Decode']
n_traces = raw_traces.shape[0]
n_frames = raw_traces.shape[2]
decode_array = np.array(decode)

# Load data in upd file
upd_TD = upd_file['TraceData']
hole_xy_plot = upd_TD['HoleXYPlot'] # (x,y) coordinates for each well
hole_status = upd_TD['HoleStatus'] # fiducials

# Extract frame rate from metadata
meta_tree = ET.parse(meta_filename)
root = meta_tree.getroot()
for keyval in root.iter('{http://pacificbiosciences.com/PAP/Metadata.xsd}KeyValue'):
    if keyval.attrib['key'] == 'MovieFPS':
        fps = int(keyval.text)
        frame_rate = 1.0/fps # seconds
        break

# For plotting
time_axis = np.arange(n_frames)*frame_rate
colors = ['green', (0, 1, 0), 'red', 'orange']
lasers = ['Cy3', 'Cy3.5', 'Cy5', 'Cy5.5']

init_class = np.zeros((N_CLASS, n_frames))

# Skip decode
traces = raw_traces

avg_intens = np.mean(traces, axis=2)
# zero-center the traces
centered_traces = np.zeros(raw_traces.shape)
for i in range(n_traces):
    for j in range(4):
        centered_traces[i, j] = traces[i, j] - avg_intens[i, j]

# Compute cross-correlation
corr_list = np.zeros(n_traces)
for i in range(n_traces):
    cy3 = centered_traces[i][0]
    cy5 = centered_traces[i][2]
    corr_list[i] = np.correlate(cy3, cy5)

sorting_index = np.argsort(corr_list)

scaled_data_cy5 = np.zeros((n_traces, n_frames))

for i in range(n_traces):
    cy5_trc = centered_traces[i, 2, :]
    cy5_min = cy5_trc.min()
    cy5_max = cy5_trc.max()
    if cy5_min == cy5_max:
        scaled_data_cy5[i] = np.ones(cy5_trc.shape)
    else:
        scaled_data_cy5[i] = (cy5_trc - cy5_min) / (cy5_max - cy5_min)

In [7]:
labels_p = utils_dtw.dtw_classification_parallel(scaled_data_cy5, 3, n_frames, CLS_EXAMPLE)

NameError: name 'init_class' is not defined

In [None]:
plt.hist(labels_p)

In [None]:
classes = utils_dtw.class_extractor(labels_p, 4)
utils_dtw.plot_traces(10, scaled_data_cy5, np.random.choice(classes[0], size=(10,)))

In [None]:
utils_dtw.plot_traces(10, scaled_data_cy5, np.random.choice(classes[1], size=(10,)))

In [None]:
utils_dtw.plot_traces(10, scaled_data_cy5, np.random.choice(classes[2], size=(10,)))