In [1]:
from tqdm import tqdm
import json
import os
import pandas as pd
import numpy as np
import pylab as plt
import seaborn as sns
from pathlib import Path

In [2]:
from numba import jit
import scipy.stats as sstats

@jit(nopython=True)
def lognorm_pdf(x, mu, sigma):
    ''' PDF of the log-normal distribution.'''
    out = 1.0/(x*sigma*np.sqrt(2*np.pi))*np.exp(-(np.log(x)-mu)**2/(sigma**2))
    
    return out

In [3]:
@jit(nopython=True)
def norm_pdf(x, mu, sigma):

    out = 1.0/(sigma*np.sqrt(2*np.pi))*np.exp(-((x-mu)**2)/(sigma**2))

    return out

In [4]:
@jit(nopython=True)
def p_o_G_s(o, s, mu_l, sigma_l, sigma_alpha_2):
    p_l = lognorm_pdf(o[0], mu_l[s], sigma_l[s]) 
    if s == 0:
        p_alpha = 1/(2*np.pi)
    else:
        p_alpha = norm_pdf(o[1], 0.0, sigma_alpha_2)

    p_o_G_s = p_l*p_alpha
    
    if p_o_G_s < 10**(-20):
        p_o_G_s = 10**(-20)
    
    return p_o_G_s

In [5]:
@jit(nopython=True)
def p_olog_G_s(o, s, mu_l, sigma_l, sigma_alpha_2):
    p_l = norm_pdf(o[0], mu_l[s], sigma_l[s]) 

    if s == 0:
        p_alpha = 1/(2*np.pi)
    else:
        p_alpha = norm_pdf(o[1], 0.0, sigma_alpha_2)

    p_o_G_s = p_l*p_alpha
    if p_o_G_s < 10**(-20):
        p_o_G_s = 10**(-20)
    return p_o_G_s

In [6]:
@jit(nopython=True)
def viterbi(T, pi, mu_l, sigma_l, sigma_alpha_2, O):
    """Viterbi algorithm for solving the uncovering problem

    Notebook: C5/C5S3_Viterbi.ipynb

    Args:
        T (np.ndarray): State transition probability matrix of dimension K x K
        pi (np.ndarray): Initial state distribution  of dimension K
        O (np.ndarray): Observation sequence of length N

    Returns:
        S_opt (np.ndarray): Optimal state sequence of length N
        D (np.ndarray): Accumulated probability matrix
        E (np.ndarray): Backtracking matrix
    """
    K = T.shape[0]    # Number of states
    N = len(O)  # Length of observation sequence

    # Initialize D and E matrices
    D = np.zeros((K, N)).astype(np.float64)
    E = np.zeros((K, N-1)).astype(np.int32)
    D[:, 0] = pi #np.multiply(C, B[:, O[0]])

    # Compute D and E in a nested loop
    for n in range(1, N):
        for i in range(K):
            temp_product = np.multiply(T[:, i], D[:, n-1])
            D[i, n] = np.max(temp_product) * p_o_G_s(O[n], i, mu_l, sigma_l, sigma_alpha_2)
            E[i, n-1] = np.argmax(temp_product)

    # Backtracking
    S_opt = np.zeros(N).astype(np.int32)
    S_opt[-1] = np.argmax(D[:, -1])
    for n in range(N-2, -1, -1):
        S_opt[n] = E[int(S_opt[n+1]), n]

    return S_opt, D, E

In [7]:
def convert_to_cartesian(o):
    X = np.zeros(o.shape, dtype=float)
    X[:,1] = np.cumsum(o[:,1])
    X[:,0] = np.cumsum(o[:,0]*np.cos(X[:,1]))
    X[:,1] = np.cumsum(o[:,0]*np.sin(X[:,1]))
    
    return X

In [8]:
def convert_to_planar(X, dt=1.0):
    X_diff = np.diff(X, axis=0)
    l = np.sqrt(X_diff[:,0]**2+X_diff[:,1]**2)/dt + np.random. uniform(0.0, 0.000001)
    correct_idx = (l>0.0).nonzero()
    gamma = np.arctan2(X_diff[:,1],X_diff[:,0])
    alpha = np.diff(gamma)
    alpha[alpha>np.pi] = alpha[alpha>np.pi] - 2*np.pi
    alpha[alpha<-np.pi] = alpha[alpha<-np.pi] + 2*np.pi

    return l[1:], alpha[:-2]

In [None]:
with open('/data/norm_list.txt') as f_norm:
    norm_tables = f_norm.read().splitlines()


straight_norm_tracks = {}
straight_norm_tracks['total tracks'] = []
for table in norm_tables[:]:
    #print(table)
    d = table[:table.rfind('/')+1]
    #print(d)
    key_counter = 0
    tracks = pd.read_csv(d + 'Track_Data.xls', sep='\t')
    with open(d + "fitted_params.json", "r") as read_file:
        params = json.load(read_file)
    straight_norm_tracks[d] = []
    straight_norm_tracks['total tracks'].append(len(params.keys()))
    for key in tqdm(params.keys()):

        track = int(params[key]['track_id'])
        track0 = tracks.loc[tracks['TRACK_ID']==track]
 
        pi_tot = np.array(params[key]['pi'])
        mu_tot = np.array(params[key]['mu_l'])
        sigma_tot = np.array(params[key]['sigma_l'])
        T_tot = np.array(params[key]['T'])
                         
        X = np.array([track0['POSITION_X'], track0['POSITION_Y']]).T
        l, alpha = convert_to_planar(X)
        o = np.array([l[1:-1], alpha]).T
        if o.shape[0] > 2 and np.isfinite(T_tot).all() and np.isfinite(pi_tot).all() and np.isfinite(mu_tot).all() and np.isfinite(sigma_tot).all():
            s_est, D, E = viterbi(T_tot, pi_tot, mu_tot, sigma_tot, float(params['0']['sigma_alpha_2']), o)
            s1_idx = s_est.nonzero()[0]
            if s1_idx.shape[0]>0:
                if 1 in np.diff(s1_idx):
                    Path(d+"Tracks").mkdir(parents=True, exist_ok=True)
                    straight_norm_tracks[d].append(key)
                    s_1_idx = (s_est==0)
                    s_2_idx = (s_est==1)
                    #print(X.shape)
                    for ii in range(X.shape[0]-5):
                        if s_est[ii+1] == 0:
                            plt.plot(X[ii+1:ii+3,0], X[ii+1:ii+3,1], c='blue')
                        else:
                            plt.plot(X[ii+1:ii+3,0], X[ii+1:ii+3,1], c='red')
                    plt.savefig(d+"Tracks/"+str(params[key]['track_id']))
                    plt.close('all')
                            
                    

100%|██████████| 3037/3037 [03:16<00:00, 15.46it/s]
100%|██████████| 3136/3136 [03:46<00:00, 13.85it/s]  
100%|██████████| 3486/3486 [03:06<00:00, 18.69it/s] 
100%|██████████| 2359/2359 [02:00<00:00, 19.58it/s]
100%|██████████| 1561/1561 [01:13<00:00, 21.19it/s]
100%|██████████| 3478/3478 [03:18<00:00, 17.48it/s] 
100%|██████████| 3672/3672 [03:20<00:00, 18.29it/s] 
100%|██████████| 4213/4213 [06:36<00:00, 10.63it/s]   
100%|██████████| 2587/2587 [01:48<00:00, 23.91it/s]
  2%|▏         | 89/3602 [00:10<08:30,  6.88it/s]

In [None]:
l = np.array(straight_norm_tracks['total tracks'])
mu_l = []
sigma_l = []
pi = []
T = []
PS_norm = []
for ii, key in enumerate(list(straight_norm_tracks.keys())[1:]):
    with open(key + "fitted_params.json", "r") as read_file:
        params = json.load(read_file)
    PS_norm.append(len(straight_norm_tracks[key])/l[ii])
    for track in straight_norm_tracks[key]:
        mu_l.append(params[str(track)]['mu_l'])
        sigma_l.append(params[str(track)]['sigma_l'])
        pi.append(params[str(track)]['pi'])
        T.append(params[str(track)]['T'])
        
mu_l_norm = np.array(mu_l)   
sigma_l_norm = np.array(sigma_l)
pi_norm = np.array(pi)
T_norm = np.array(T)

In [None]:
np.sum(l)

In [None]:
with open('/data/noc_list.txt') as f_norm:
    noc_tables = f_norm.read().splitlines()


straight_noc_tracks = {}
straight_noc_tracks['total tracks'] = []
for table in noc_tables[:]:
    #print(table)
    d = table[:table.rfind('/')+1]
    #print(d)
    key_counter = 0
    tracks = pd.read_csv(d + 'Track_Data.xls', sep='\t')
    with open(d + "fitted_params.json", "r") as read_file:
        params = json.load(read_file)
    straight_noc_tracks[d] = []
    straight_noc_tracks['total tracks'].append(len(params.keys()))
    for key in tqdm(params.keys()):

        track = int(params[key]['track_id'])
        track0 = tracks.loc[tracks['TRACK_ID']==track]
 
        pi_tot = np.array(params[key]['pi'])
        mu_tot = np.array(params[key]['mu_l'])
        sigma_tot = np.array(params[key]['sigma_l'])
        T_tot = np.array(params[key]['T'])
                         
        X = np.array([track0['POSITION_X'], track0['POSITION_Y']]).T
        l, alpha = convert_to_planar(X)
        o = np.array([l[1:-1], alpha]).T
        if o.shape[0] > 2 and np.isfinite(T_tot).all() and np.isfinite(pi_tot).all() and np.isfinite(mu_tot).all() and np.isfinite(sigma_tot).all():
            Path(d+"Tracks").mkdir(parents=True, exist_ok=True)
            s_est, D, E = viterbi(T_tot, pi_tot, mu_tot, sigma_tot, float(params['0']['sigma_alpha_2']), o)
            s1_idx = s_est.nonzero()[0]
            if s1_idx.shape[0]>0:
                if 1 in np.diff(s1_idx):
                    straight_noc_tracks[d].append(key)
                    for ii in range(X.shape[0]-5):
                        if s_est[ii+1] == 0:
                            plt.plot(X[ii+1:ii+3,0], X[ii+1:ii+3,1], c='blue')
                        else:
                            plt.plot(X[ii+1:ii+3,0], X[ii+1:ii+3,1], c='red')
                    plt.savefig(d+"Tracks/"+str(params[key]['track_id']))
                    plt.close('all')

100%|██████████| 2667/2667 [01:54<00:00, 23.26it/s]
100%|██████████| 1866/1866 [01:06<00:00, 28.26it/s]
100%|██████████| 2141/2141 [02:12<00:00, 16.19it/s]
100%|██████████| 1076/1076 [00:50<00:00, 21.22it/s]
100%|██████████| 4100/4100 [02:54<00:00, 23.52it/s] 
100%|██████████| 926/926 [00:42<00:00, 21.57it/s]
100%|██████████| 1195/1195 [01:04<00:00, 18.44it/s]
100%|██████████| 856/856 [00:44<00:00, 19.21it/s]
100%|██████████| 1687/1687 [01:06<00:00, 25.29it/s]
100%|██████████| 1201/1201 [01:28<00:00, 13.60it/s]
100%|██████████| 529/529 [00:16<00:00, 32.22it/s]
100%|██████████| 2492/2492 [01:21<00:00, 30.74it/s]
100%|██████████| 582/582 [01:00<00:00,  9.64it/s]
100%|██████████| 3057/3057 [01:47<00:00, 28.39it/s]
100%|██████████| 4670/4670 [03:18<00:00, 23.58it/s]  
100%|██████████| 1700/1700 [00:58<00:00, 29.30it/s]
100%|██████████| 1338/1338 [01:12<00:00, 18.48it/s]
100%|██████████| 1333/1333 [11:16<00:00,  1.97it/s] 
 21%|██▏       | 455/2137 [00:25<00:48, 34.37it/s]

In [None]:
l = np.array(straight_noc_tracks['total tracks'])
mu_l = []
sigma_l = []
pi = []
T = []
PS_noc = []
print(len(list(straight_noc_tracks.keys())))
for ii, key in enumerate(list(straight_noc_tracks.keys())[1:]):
    with open(key + "fitted_params.json", "r") as read_file:
        params = json.load(read_file)
    #print(len(straight_noc_tracks[key]))
    #print(len(straight_noc_tracks[key])/l[ii])
    PS_noc.append(len(straight_noc_tracks[key])/l[ii])
    for track in straight_noc_tracks[key]:
        mu_l.append(params[str(track)]['mu_l'])
        sigma_l.append(params[str(track)]['sigma_l'])
        pi.append(params[str(track)]['pi'])
        T.append(params[str(track)]['T'])
        
mu_l_noc = np.array(mu_l)   
sigma_l_noc = np.array(sigma_l)
pi_noc = np.array(pi)
T_noc = np.array(T)

In [None]:
with open('/data/Listekopex.txt') as f_norm:
    kop_tables = f_norm.read().splitlines()


straight_kop_tracks = {}
straight_kop_tracks['total tracks'] = []
for table in kop_tables[:]:
    #print(table)
    d = table[:table.rfind('/')+1]
    #print(d)
    key_counter = 0
    try:
        tracks = pd.read_csv(d + 'Track_Data.xls', sep='\t')
        with open(d + "fitted_params.json", "r") as read_file:
            params = json.load(read_file)
    except:
        print('Could not read params and/or track file for %s'%d)
        continue
    straight_kop_tracks[d] = []
    straight_kop_tracks['total tracks'].append(len(params.keys()))
    for key in tqdm(params.keys()):
        
        track = int(params[key]['track_id'])
        track0 = tracks.loc[tracks['TRACK_ID']==track]
 
        pi_tot = np.array(params[key]['pi'])
        mu_tot = np.array(params[key]['mu_l'])
        sigma_tot = np.array(params[key]['sigma_l'])
        T_tot = np.array(params[key]['T'])
                         
        X = np.array([track0['POSITION_X'], track0['POSITION_Y']]).T
        l, alpha = convert_to_planar(X)
        o = np.array([l[1:-1], alpha]).T
        if o.shape[0] > 2 and np.isfinite(T_tot).all() and np.isfinite(pi_tot).all() and np.isfinite(mu_tot).all() and np.isfinite(sigma_tot).all():
            Path(d+"Tracks").mkdir(parents=True, exist_ok=True)
            s_est, D, E = viterbi(T_tot, pi_tot, mu_tot, sigma_tot, float(params['0']['sigma_alpha_2']), o)
            s1_idx = s_est.nonzero()[0]
            if s1_idx.shape[0]>0:
                if 1 in np.diff(s1_idx):
                    straight_kop_tracks[d].append(key)
                    for ii in range(X.shape[0]-5):
                        if s_est[ii+1] == 0:
                            plt.plot(X[ii+1:ii+3,0], X[ii+1:ii+3,1], c='blue')
                        else:
                            plt.plot(X[ii+1:ii+3,0], X[ii+1:ii+3,1], c='red')
                    plt.savefig(d+"Tracks/"+str(params[key]['track_id']))
                    plt.close('all')

100%|██████████| 515/515 [00:20<00:00, 25.37it/s]
100%|██████████| 298/298 [00:12<00:00, 24.04it/s]


Could not read params and/or track file for /data/parralelcomputed_trackdata/ko pex/20180427_KOPEX14_pJK26_GFP90ms_Triggered_1_2.cell2.xls_Ordner/


100%|██████████| 358/358 [00:06<00:00, 57.56it/s]
100%|██████████| 212/212 [00:12<00:00, 16.89it/s]
100%|██████████| 511/511 [00:21<00:00, 23.69it/s]
100%|██████████| 97/97 [00:09<00:00,  9.78it/s]
100%|██████████| 478/478 [00:22<00:00, 20.90it/s]
100%|██████████| 372/372 [00:17<00:00, 21.23it/s]
100%|██████████| 225/225 [00:07<00:00, 31.41it/s]
100%|██████████| 1137/1137 [00:46<00:00, 24.56it/s]
100%|██████████| 130/130 [00:03<00:00, 34.78it/s]
100%|██████████| 391/391 [00:30<00:00, 12.72it/s]
100%|██████████| 122/122 [00:03<00:00, 33.89it/s]
100%|██████████| 144/144 [00:06<00:00, 23.18it/s]
100%|██████████| 503/503 [00:27<00:00, 18.50it/s]
100%|██████████| 704/704 [00:31<00:00, 22.03it/s]
100%|██████████| 498/498 [00:29<00:00, 17.00it/s]
100%|██████████| 655/655 [00:20<00:00, 32.35it/s]
100%|██████████| 291/291 [00:10<00:00, 28.28it/s]
100%|██████████| 734/734 [00:51<00:00, 14.12it/s]
100%|██████████| 632/632 [00:24<00:00, 25.86it/s]
100%|██████████| 530/530 [00:18<00:00, 28.95it/s]


Could not read params and/or track file for /data/parralelcomputed_trackdata/ko pex/20180427_KOPX14_pJK26_GFP90ms_Triggered_2_4cell3.xls_Ordner/


100%|██████████| 105/105 [00:02<00:00, 38.68it/s]
100%|██████████| 92/92 [00:06<00:00, 13.40it/s]
100%|██████████| 262/262 [00:20<00:00, 12.49it/s]
100%|██████████| 148/148 [00:24<00:00,  6.11it/s]
100%|██████████| 131/131 [00:07<00:00, 17.34it/s]
100%|██████████| 360/360 [00:17<00:00, 20.91it/s]
100%|██████████| 247/247 [00:04<00:00, 49.67it/s]
100%|██████████| 625/625 [00:34<00:00, 18.00it/s]
100%|██████████| 128/128 [00:08<00:00, 14.62it/s]
100%|██████████| 57/57 [00:03<00:00, 16.30it/s]
100%|██████████| 123/123 [00:05<00:00, 23.96it/s]
100%|██████████| 143/143 [00:06<00:00, 22.01it/s]
100%|██████████| 127/127 [00:09<00:00, 13.99it/s]
100%|██████████| 292/292 [00:46<00:00,  6.30it/s]
100%|██████████| 286/286 [00:12<00:00, 23.26it/s]
100%|██████████| 678/678 [00:34<00:00, 19.43it/s]
100%|██████████| 581/581 [00:16<00:00, 36.02it/s]
100%|██████████| 348/348 [00:09<00:00, 37.25it/s]
100%|██████████| 293/293 [00:11<00:00, 24.74it/s]
100%|██████████| 365/365 [00:15<00:00, 23.56it/s]
100%

In [None]:
l = np.array(straight_kop_tracks['total tracks'])
mu_l = []
sigma_l = []
pi = []
T = []
PS_kop = []
print(len(list(straight_kop_tracks.keys())))
for ii, key in enumerate(list(straight_kop_tracks.keys())[1:]):
    with open(key + "fitted_params.json", "r") as read_file:
        params = json.load(read_file)
    #print(len(straight_noc_tracks[key]))
    #print(len(straight_kop_tracks[key])/l[ii])
    PS_kop.append(len(straight_kop_tracks[key])/l[ii])
    for track in straight_kop_tracks[key]:
        mu_l.append(params[str(track)]['mu_l'])
        sigma_l.append(params[str(track)]['sigma_l'])
        pi.append(params[str(track)]['pi'])
        T.append(params[str(track)]['T'])
        
mu_l_kop = np.array(mu_l)   
sigma_l_kop = np.array(sigma_l)
pi_kop = np.array(pi)
T_kop = np.array(T)

In [None]:
import seaborn as sns
pos = np.array([1, 2])
width = 0.2
box1 = plt.boxplot(np.exp(mu_l_norm+sigma_l_norm/2), positions=pos-width, widths=width, patch_artist=True)
box2 = plt.boxplot(np.exp(mu_l_noc+sigma_l_noc/2), positions=pos, widths=width, patch_artist=True)
box3 = plt.boxplot(np.exp(mu_l_kop+sigma_l_kop/2), positions=pos+width, widths=width, patch_artist=True)
plt.axis(ymax=0.3)
plt.ylabel(r'$\mu_l$')
plt.xticks([1,2], [r'$s_1$', r'$s_2$'])

for p1, p2, p3 in zip(box1['boxes'], box2['boxes'], box3['boxes']):
    p1.set_facecolor('blue')
    p2.set_facecolor('red')
    p3.set_facecolor('green')

In [None]:
from scipy.stats import wilcoxon, mannwhitneyu

mean_l_norm = np.exp(mu_l_norm+sigma_l_norm/2)
mean_l_noc = np.exp(mu_l_noc+sigma_l_noc/2)
mean_l_kop = np.exp(mu_l_kop+sigma_l_kop/2)
print(wilcoxon(mean_l_norm[:,0], mean_l_norm[:,1]))
print(wilcoxon(mean_l_noc[:,0], mean_l_noc[:,1]))
print(np.mean(mean_l_noc, axis=0))
print(np.mean(mean_l_norm, axis=0))

In [None]:
pos = np.array([1, 2])
width = 0.2
box1 = plt.boxplot(pi_norm, positions=pos-width, widths=width, patch_artist=True)
box2 = plt.boxplot(pi_noc, positions=pos, widths=width, patch_artist=True)
box3 = plt.boxplot(pi_kop, positions=pos+width, widths=width, patch_artist=True)
#plt.axis(ymax=0.3)
plt.ylabel(r'$\pi$')
plt.xticks([1,2], [r'$s_1$', r'$s_2$'])

for p1, p2, p3 in zip(box1['boxes'], box2['boxes'], box3['boxes']):
    p1.set_facecolor('blue')
    p2.set_facecolor('red')
    p3.set_facecolor('green')

In [None]:
pos = np.array([1, 2, 3, 4])
width = 0.2
box1 = plt.boxplot(T_norm.reshape((T_norm.shape[0],4)), positions=pos-width, widths=width, patch_artist=True)
box2 = plt.boxplot(T_noc.reshape((T_noc.shape[0],4)), positions=pos, widths=width, patch_artist=True)
box3 = plt.boxplot(T_kop.reshape((T_kop.shape[0],4)), positions=pos+width, widths=width, patch_artist=True)
#plt.axis(ymax=0.3)
plt.ylabel(r'T')
plt.xticks([1,2, 3, 4], [r'$s_1->s_1$', r'$s_1->s_2$', r'$s_2->s_1$', r'$s_2->s_2$'])

for p1, p2, p3 in zip(box1['boxes'], box2['boxes'], box3['boxes']):
    p1.set_facecolor('blue')
    p2.set_facecolor('red')
    p3.set_facecolor('green')

In [None]:
box1 = plt.boxplot([PS_norm, PS_noc, PS_kop], patch_artist=True)

colors = ['blue', 'red', 'green']
for p1, c in zip(box1['boxes'], colors):
    p1.set_facecolor(c)

In [None]:
mannwhitneyu(T_norm[:,0,0], T_kop[:,0,0])

In [None]:
np.__version__

In [None]:
import matplotlib

In [None]:
matplotlib.__version__