In [6]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from time import time
import os.path as path
from functions import utils

## User input

In [7]:
sample_file = utils.gui_fname().decode("utf-8")

In [3]:
background_filename = utils.gui_fname().decode("utf-8")

In [6]:
tokens = sample_file.split('/')
base = path.basename(sample_file)
abs_dir = path.dirname(sample_file)
prefix = base.split('.')[0]
sample_prefix = abs_dir + '/' + prefix

TypeError: a bytes-like object is required, not 'str'

In [None]:
color1 = 0
color2 = 2
leakage = np.array([[0.        , 0.48921841, 0.09134705, 0.02373513],
       [0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.3349036856391455],
       [0.        , 0.        , 0.        , 0.        ]])
gamma = np.ones((4,))
gamma[2] = 1.3709926961993117

In [None]:
# def plot_one_trace(data, traceID, time_axis):
# #     plt.figure(figsize=(15, 2))
#     trc = data[traceID]
#     shape = data.shape
#     if len(shape) == 2:
#         plt.plot(time_axis, trc)
#     else:
#         n_channels = data.shape[1]
#         for j in range(n_channels):
#             plt.plot(time_axis, trc[j], color=colors[j], linewidth=0.5)
#         plt.xlabel('Time (s)')
#         plt.ylabel('Intensity')
#         plt.title('Trace ' + str(traceID))

In [None]:
# def plot_traces(n, data, indices, time_axis):
#     plt.figure(figsize=(15, n*2))
# #     if n % 2 == 1:
# #         n = n-1
#     for i in range(n):
#         plt.subplot(n, 1, i+1)
#         plot_one_trace(data, indices[i], time_axis)
# #         
# #         if len(data.shape) == 2:
# #             plt.plot(time_axis, trc)
# #         else:
# #             for j in range(data.shape[1]):
# #                 plt.plot(time_axis, trc[j], color=colors[j], linewidth=0.5)

# #         plt.xlabel('Time (s)')
# #         plt.ylabel('Intensity')
# #         plt.title('Trace ' + str(indices[i]))
#     plt.subplots_adjust(hspace=0.9)

In [None]:
# def plot_traces_caller(n, toi, sorting_index, time_axis, foi, start, end, dyes, save=True):
#     plot_traces(n, toi, sorting_index[start:end], time_axis[foi])
#     filename = "%s_%d-%d_most_%s_anticorrelated.png" %(sample_prefix, start, end, dyes)
#     if save:
#         plt.savefig(filename, dpi=200)

In [None]:
# def decode_and_plot(traces, traceID, decode_array, colors, foi, background, gamma, leakage, channels):
# #     channels = range(4)
#     trace = decode_array[traces[traceID]]
#     plt.figure(figsize=(13,4))   
    
#     for i in channels:
#         trace[i] = trace[i] - background[i]
#         print(i, np.mean(trace[i, -10:]))
#         trace[i] = trace[i]*gamma[i]
#         for j in channels:
#             trace[i] = trace[i] - trace[j] * leakage[j, i]
    
#     for j in channels:
#         plt.plot(time_axis[foi], trace[j], color=colors[j], linewidth=0.4, alpha=0.9)

#     plt.xlabel('Time (s)')
#     plt.ylabel('Intensity')
#     plt.legend(lasers, bbox_to_anchor=(0.75, 0.5, 0.5, 0.5))
    
#     plt.figure(figsize=(13,4))
#     don = trace[0]
#     acc = trace[2]
#     fret = acc/(don + acc)
#     plt.plot(time_axis[foi], fret, linewidth=0.4)
#     plt.ylim([-0.1, 1.1])
#     print(fret)
    
#     plt.xlabel('Time (s)')
#     plt.ylabel('FRET efficiency')
    
#     return fret


## Load data

In [None]:
start = time()
trc_filename = sample_prefix + '.trc.h5'
mcd_filename = sample_prefix + '.mcd.h5'
upd_filename = sample_prefix + '.upd.h5'
meta_filename = sample_prefix + '.metadata.xml'

trc_file = h5py.File(trc_filename, 'r')
mcd_file = h5py.File(mcd_filename, 'r')
upd_file = h5py.File(upd_filename, 'r')

# Load data in trace file
dset = trc_file['TraceData']
raw_traces = dset['Traces']
decode = dset['Codec']['Decode']
n_traces = raw_traces.shape[0]
n_frames = raw_traces.shape[2]
decode_array = np.array(decode)

# Load data in upd file
upd_TD = upd_file['TraceData']
hole_xy_plot = upd_TD['HoleXYPlot'] # (x,y) coordinates for each well
hole_status = upd_TD['HoleStatus'] # fiducials

# Extract frame rate from metadata
meta_tree = ET.parse(meta_filename)
root = meta_tree.getroot()
for keyval in root.iter('{http://pacificbiosciences.com/PAP/Metadata.xsd}KeyValue'):
    if keyval.attrib['key'] == 'MovieFPS':
        fps = int(keyval.text)
        frame_rate = 1.0/fps # seconds
        break

# For plotting
time_axis = np.arange(n_frames)*frame_rate
colors = ['green', (0, 1, 0), 'red', 'orange']
lasers = ['Cy3', 'Cy3.5', 'Cy5', 'Cy5.5']

bg_file = open(background_file, "rb")
bg = np.loadtxt(bg_file, delimiter=",")

print("Time passed: " + str(time() - start))

In [None]:
traces = raw_traces # skipping decode step to save memory

## Preprocessing

In [None]:
foi = range(start_frame, end_frame) # frames of interest
n_frames = len(foi)

first_10_frames = np.zeros((n_traces, 4, 10), dtype=np.uint8)
first_10_frames = raw_traces[:, :, range(start_frame, start_frame + 10)]
avg_intens = np.mean(first_10_frames, axis=2)

avg_intens_bgs = np.zeros((n_traces, 4), dtype=np.int16)

for i in range(n_traces):
    new_intens = avg_intens[i] - bg[i]
    avg_intens_bgs[i]= new_intens

## Plot intensity distributions

In [None]:
plt.figure()
for i in range(4):
    avg_intens_i = avg_intens.T[i]
    min_i = min(avg_intens_i)
    max_i = max(avg_intens_i)
    plt.hist(avg_intens_i, bins=100, range=[min_i, max_i], histtype='step', color=colors[i])
    
plt.xlabel('Intensity')
plt.ylabel('Count')
plt.title("Before filtering")
plt.legend(lasers)
plt.savefig(sample_prefix + '_intensity_hist.png', dpi=200)


plt.figure()
for i in range(4):
    bg_i = bg.T[i]
    min_i = min(bg_i)
    max_i = max(bg_i)
    plt.hist(bg_i, bins=100, range=[min_i, max_i], histtype='step', color=colors[i])
    
plt.xlabel('Intensity')
plt.ylabel('Count')
plt.title("Background")
plt.legend(lasers)
plt.savefig(sample_prefix + "_background_hist.png", dpi=200)

plt.figure()
for i in range(4):
    color_i = avg_intens_bgs.T[i]
    min_i = min(color_i)
    max_i = max(color_i)
    plt.hist(color_i, bins=50, range=[min_i, max_i], histtype='step', color=colors[i])
    
plt.xlabel('Intensity')
plt.ylabel('Count')
plt.title("First 10 frames avg intens after bgs")
plt.legend(lasers)
plt.savefig(sample_prefix + '_intensity_hist_after_bgs.png', dpi=200)

In [None]:
plt.figure(figsize=(7.5, 6))
for i in range(4):
    plt.subplot(2, 2, i+1)
    plt.scatter(hole_xy_plot[:, 0], hole_xy_plot[:, 1], hole_status)
    plt.scatter(hole_xy_plot[:, 0], hole_xy_plot[:, 1], 1, avg_intens.T[i])
    plt.title(lasers[i] + ' intensity')
    plt.colorbar()
plt.subplots_adjust(hspace=0.3)
plt.title("Before filtering")
plt.savefig(sample_prefix + '_grid.png')
plt.show()

plt.figure(figsize=(7.5, 6))
for i in range(4):
    plt.subplot(2, 2, i+1)
    plt.scatter(hole_xy_plot[:, 0], hole_xy_plot[:, 1], hole_status)
    plt.scatter(hole_xy_plot[:, 0], hole_xy_plot[:, 1], 1, avg_intens_bgs.T[i])
    plt.title(lasers[i] + ' intensity')
    plt.colorbar()
plt.subplots_adjust(hspace=0.3)
# plt.title("Before background subtraction")
plt.savefig(sample_prefix + '_grid_bgs.png')
plt.show()

## Filtering for wells that have desired intensity

In [None]:
channel = 2
low = 30
high = 180
sele_traces = []

for i in range(n_traces):
    if avg_intens_bgs[i, channel] > low and avg_intens_bgs[i, channel] < high:
        sele_traces.append(i)

print("%d traces have desired intensity range." %len(sele_traces))

In [None]:
# display selected traces
count = 0
end_high = 20

for i in range(1800, 2000):
    traceID = sele_traces[i]
    last_10_frames = raw_traces[traceID, channel, [end_frame-10, end_frame]] - bg[traceID, channel]
    end_avg = np.mean(last_10_frames)
    if end_avg < end_high:
        plt.figure(figsize=(13,2))
        plot_one_trace_bgs(raw_traces, traceID, time_axis, bg, foi)
        count += 1

print("Showing %d traces." %count)