In [1]:
import numpy as np
import scipy as sp
from scipy import stats
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib import cm
import re
from os import listdir
from scipy.interpolate import griddata
from scipy.ndimage.filters import gaussian_filter

%matplotlib inline
mpl.rcParams['pdf.fonttype'] = 42

In [2]:
pixsize = 8190/60/1.5/1.2/512 #um
dataname_list = [['5ITu_01Pos0','5ITu_03Pos6','DMSO_01Pos7','DMSO_01Pos10',
                 'Taxol_01Pos7','Taxol_01Pos8','Taxol+5ITu_01Pos6'],
                 ['5ITu_01Pos13','5ITu_02Pos7','5ITu_03Pos1',
                'DMSO_01Pos0','DMSO_01Pos4','DMSO_01Pos9',
               'Taxol_01Pos5','Taxol_02Pos12','Taxol_02Pos14',
               'Taxol+5ITu_03Pos5'],
                 ['5ITu_03Pos0','DMSO_01Pos5','DMSO_01Pos6','DMSO_01Pos10','DMSO_02Pos6',
                 'Taxol_01Pos2'],['ZM_01Pos6']]
linepath_list = ['20171006_LineProfiles/','20171009_LineProfiles/','20171012_LineProfiles/','20171021_LineProfiles_cTY31/']
imagepath_list = ['20171006_ImageSequences/','20171009_ImageSequences/','20171012_ImageSequences/','20171021_ImageSequences_cTY31/']

trange0 = np.arange(1,4)   #No treatment
trange1 = np.arange(4,7)   #First treatment (5ITu or Taxol or DMSO)
trange2 = np.arange(7,10)  #Second treatment (5ITu+Taxol)

kkdistrange = (0.6,1.75)  #in um

In [9]:
def get_df_pair(dataname,linepath):
    #load features and profiles, and add kkdists (in pixels)
    df_feat = pd.read_csv(linepath+dataname+'_feat.csv')
    df_feat = df_feat.drop(df_feat.columns[0],axis=1)

    df_prof = pd.read_csv(linepath+dataname+'_prof.csv')
    df_prof = df_prof.drop(df_prof.columns[0],axis=1)
    
    kkdists = []
    poslists = []
    zlists = []
    for i in df_prof.index:
        ids = df_prof.iloc[i][['kinid1','kinid2']].values
        fr = df_prof.iloc[i]['fr']
        feat_sel = df_feat.loc[(df_feat['frame']==fr) & ((df_feat['kinid']==ids[0]) | (df_feat['kinid']==ids[1]))]
        kkdists.append(np.linalg.norm(np.diff(feat_sel[['x','y']].values,axis=0)))
        poslists.append(np.concatenate((feat_sel.iloc[0][['x','y']].values,feat_sel.iloc[1][['x','y']])))
        zlists.append(feat_sel.iloc[0]['z'])

    poslists = np.array(poslists)
    
    df_pair = (df_prof[['fr','kinid1','kinid2','tpoint']]).copy()
    df_pair.loc[:,'z'] = zlists
    df_pair.loc[:,'kkdist'] = kkdists
    df_pair.loc[:,'x1']=poslists[:,0]
    df_pair.loc[:,'y1']=poslists[:,1]
    df_pair.loc[:,'x2']=poslists[:,2]
    df_pair.loc[:,'y2']=poslists[:,3]
    
    # get rid of double-paired kinetochores
    df_pair_grp = df_pair.groupby(['tpoint','z'])
    
    new_df_pair = pd.DataFrame()
    for ind,df in df_pair_grp:
        t,z = ind
        ids = df[['kinid1','kinid2']].values.flatten()
        ids,counts = np.unique(ids,return_counts=True)
        dupids = ids[counts>1]
        mask = np.ones(df.shape[0])
        for dupid in dupids:
            mask = mask*np.prod(~(df[['kinid1','kinid2']].values==dupid),axis=1)
        df = df.loc[mask.astype(bool)].copy()
        new_df_pair = new_df_pair.append(df)
    
    return new_df_pair

In [10]:
linepath = linepath_list[0]
imagepath = imagepath_list[0]
dataname = 'DMSO_01Pos7'

df_pair = get_df_pair(dataname,linepath)

In [12]:
df_pair.head()

Unnamed: 0,fr,kinid1,kinid2,tpoint,z,kkdist,x1,y1,x2,y2
52,0,4,6,1,1.0,9.620535,190.387892,273.688341,181.632832,277.676274
53,0,5,8,1,1.0,9.850376,200.287289,276.120867,194.206439,283.870265
113,1,0,2,1,2.0,10.458541,173.163569,250.69052,166.016911,258.326381
114,1,5,6,1,2.0,4.958473,194.168209,267.002523,189.239366,267.543786
115,1,8,9,1,2.0,4.148427,192.412117,272.140613,188.872038,274.303318


In [13]:
imgnamelist = np.array(listdir(imagepath+dataname))
t,z = (1,1)

imgpath = imagepath+dataname+'/'
imgname_488 = imgnamelist[np.where([len(re.findall('\_t%03d_z%03d_c001' %(t,z),imgname))>0 
                               for imgname in imgnamelist])][0]
imgname_560 = imgnamelist[np.where([len(re.findall('\_t%03d_z%03d_c002' %(t,z),imgname))>0 
                               for imgname in imgnamelist])][0]

img488 = np.double(plt.imread(imgpath+imgname_488))
img560 = np.double(plt.imread(imgpath+imgname_560))

In [14]:
def getInt(df_pair,imgpath):
    imgnamelist = np.array(listdir(imgpath))
    
    df_pair.loc[:,'int488_1'] = 0
    df_pair.loc[:,'int488_2'] = 0
    df_pair.loc[:,'int560_1'] = 0
    df_pair.loc[:,'int560_2'] = 0
    df_pair.loc[:,'int560_mid'] = 0
    
    for t,z in df_pair[['tpoint','z']].drop_duplicates().values:
        select_df = (df_pair.loc[(df_pair['tpoint']==t) & (df_pair['z']==z)]).copy()
                
        # load images
        imgname_488 = imgnamelist[np.where([len(re.findall('\_t%03d_z%03d_c001' %(t,z),imgname))>0 
                                       for imgname in imgnamelist])][0]
        imgname_560 = imgnamelist[np.where([len(re.findall('\_t%03d_z%03d_c002' %(t,z),imgname))>0 
                                       for imgname in imgnamelist])][0]    
        
        img488 = np.double(plt.imread(imgpath+imgname_488))
        img560 = np.double(plt.imread(imgpath+imgname_560))
        
        # A meshgrid of pixel coordinates
        nx, ny = img488.shape[1], img488.shape[0]
        X, Y = np.meshgrid(np.arange(0, nx, 1), np.arange(0, ny, 1))
        
        mask = np.any(np.array([(X.flatten()-x)**2+(Y.flatten()-y)**2<=9 
                                for x,y in list(zip(select_df[['x1','x2']].values.flatten(),
                                                    select_df[['y1','y2']].values.flatten()))]),axis=0)
        
        points = list(zip(X.flatten()[mask],Y.flatten()[mask]))
        zz488 = img488.flatten()[mask]
        zz560 = img560.flatten()[mask]
        
        select_df.loc[:,'int488_1'] = griddata(points,zz488,select_df[['x1','y1']].values,method='cubic')
        select_df.loc[:,'int488_2'] = griddata(points,zz488,select_df[['x2','y2']].values,method='cubic')
        select_df.loc[:,'int560_1'] = griddata(points,zz560,select_df[['x1','y1']].values,method='cubic')
        select_df.loc[:,'int560_2'] = griddata(points,zz560,select_df[['x2','y2']].values,method='cubic')
        select_df.loc[:,'int560_mid'] = griddata(points,zz560,(select_df[['x1','y1']].values+select_df[['x2','y2']].values)/2,
                                                 method='cubic')
        
        df_pair.loc[(df_pair['tpoint']==t) & (df_pair['z']==z)] = select_df.copy()
    
    return df_pair

In [15]:
getInt(df_pair,imgpath)

Unnamed: 0,fr,kinid1,kinid2,tpoint,z,kkdist,x1,y1,x2,y2,int488_1,int488_2,int560_1,int560_2,int560_mid
52,0,4,6,1,1.0,9.620535,190.387892,273.688341,181.632832,277.676274,6949.833581,22932.112720,26278.969900,5525.475248,17941.986317
53,0,5,8,1,1.0,9.850376,200.287289,276.120867,194.206439,283.870265,25043.376261,7926.817734,8592.600239,12002.277157,41022.558969
113,1,0,2,1,2.0,10.458541,173.163569,250.690520,166.016911,258.326381,20816.280104,18673.720560,11706.671649,6003.342359,19538.420796
114,1,5,6,1,2.0,4.958473,194.168209,267.002523,189.239366,267.543786,16737.956192,17319.798623,5954.070170,15341.184238,9282.620856
115,1,8,9,1,2.0,4.148427,192.412117,272.140613,188.872038,274.303318,16002.811309,15896.506632,27356.519680,28610.841215,30767.561783
116,1,11,12,1,2.0,10.624734,200.988076,285.002900,193.714876,292.747934,20964.751363,16559.884190,17178.211975,5933.718610,19302.830625
399,2,0,1,1,3.0,12.193751,172.757904,238.923668,162.774557,245.925121,21109.114107,14517.853843,13069.783851,14616.175954,15754.237206
400,2,2,3,1,3.0,10.115897,172.122554,246.742311,164.264151,253.112309,19575.274464,12529.572934,11382.376652,6770.201387,25797.253626
401,2,4,5,1,3.0,9.713908,173.501129,256.375847,167.577926,264.074916,9222.480962,15268.129534,16282.323514,4435.702771,13555.238558
402,2,9,11,1,3.0,8.617480,197.747069,276.869347,192.922201,284.009488,19229.923766,13717.918903,11050.747329,12355.302014,24317.469587


In [16]:
# determine cytoplasmic background level
def getCytoBg(df_pair,imgpath,windowsize=20/pixsize):
    # Determine center of the cell
    x = np.concatenate((df_pair.loc[df_pair['tpoint']==1,'x1'],df_pair.loc[df_pair['tpoint']==1,'x2']))
    y = np.concatenate((df_pair.loc[df_pair['tpoint']==1,'y1'],df_pair.loc[df_pair['tpoint']==1,'y2']))

    cenx = np.mean(x)
    ceny = np.mean(y)

    # Load first timepoint images to compute cytoplasmic background level
    imgnamelist = np.array(listdir(imgpath))

    mask488 = [len(re.findall('\_t001_z\d*_c001',imgname))>0 for imgname in imgnamelist]
    mask560 = [len(re.findall('\_t001_z\d*_c002',imgname))>0 for imgname in imgnamelist]

    imgnamelist0_488 = imgnamelist[np.array(mask488)]
    imgnamelist0_560 = imgnamelist[np.array(mask560)]

    imgs488 = []
    filtimgs488 = []
    imgs560 = []
    filtimgs560 = []

    for fname in imgnamelist0_488:
        temp = plt.imread(imgpath+fname)
        imgs488.append(temp[int(ceny-windowsize/2):int(ceny+windowsize/2),int(cenx-windowsize/2):int(cenx+windowsize/2)])
        
    for img488 in imgs488:
        filtimgs488.append(gaussian_filter(np.double(img488),1))

    temp = np.array(filtimgs488)
    temp = temp.flatten()
    hh,edges = np.histogram(temp,bins=100)
    cytobg488 = np.mean(temp[(temp>edges[np.argmax(hh)]) & (temp<edges[np.argmax(hh)+1])])
    
    for fname in imgnamelist0_560:
        temp = plt.imread(imgpath+fname)
        imgs560.append(temp[int(ceny-windowsize/2):int(ceny+windowsize/2),int(cenx-windowsize/2):int(cenx+windowsize/2)])

    for img560 in imgs560:
        filtimgs560.append(gaussian_filter(np.double(img560),1))

    temp = np.array(filtimgs560)
    temp = temp.flatten()
    hh,edges = np.histogram(temp,bins=100)
    cytobg560 = np.mean(temp[(temp>edges[np.argmax(hh)]) & (temp<edges[np.argmax(hh)+1])])
    
    return cytobg488,cytobg560

In [17]:
# normalize intensities based on cytoplasmic background and the average intensity of mCherry at the mid point between 
# two sister kinetochores and the average GFP intensity at kinetochores before drug treatments
def normalize_intensity(df_pair,imgpath,windowsize=20/pixsize,trange0=trange0):
    cytobg488,cytobg560 = getCytoBg(df_pair,imgpath,windowsize=windowsize)
    
    #determine max 488, 560 intentsity
    mask = [t in trange0 for t in df_pair['tpoint']]

    avg_int560_mid = df_pair.loc[mask,'int560_mid'].mean()
    avg_int488 = (df_pair.loc[mask,['int488_1','int488_2']].values).flatten().mean()
    
    df_pair['normint488_1'] = (df_pair['int488_1']-cytobg488)/(avg_int488-cytobg488)
    df_pair['normint488_2'] = (df_pair['int488_2']-cytobg488)/(avg_int488-cytobg488)
    df_pair['normint560_1'] = (df_pair['int560_1']-cytobg560)/(avg_int560_mid-cytobg560)
    df_pair['normint560_2'] = (df_pair['int560_2']-cytobg560)/(avg_int560_mid-cytobg560)
    df_pair['normint560_mid'] = (df_pair['int560_mid']-cytobg560)/(avg_int560_mid-cytobg560)
    
    return df_pair
    

In [18]:
normalize_intensity(df_pair,imgpath)

Unnamed: 0,fr,kinid1,kinid2,tpoint,z,kkdist,x1,y1,x2,y2,int488_1,int488_2,int560_1,int560_2,int560_mid,normint488_1,normint488_2,normint560_1,normint560_2,normint560_mid
52,0,4,6,1,1.0,9.620535,190.387892,273.688341,181.632832,277.676274,6949.833581,22932.112720,26278.969900,5525.475248,17941.986317,0.396594,1.633091,1.376146,0.177989,0.894829
53,0,5,8,1,1.0,9.850376,200.287289,276.120867,194.206439,283.870265,25043.376261,7926.817734,8592.600239,12002.277157,41022.558969,1.796432,0.472180,0.355063,0.551913,2.227335
113,1,0,2,1,2.0,10.458541,173.163569,250.690520,166.016911,258.326381,20816.280104,18673.720560,11706.671649,6003.342359,19538.420796,1.469396,1.303633,0.534847,0.205577,0.986996
114,1,5,6,1,2.0,4.958473,194.168209,267.002523,189.239366,267.543786,16737.956192,17319.798623,5954.070170,15341.184238,9282.620856,1.153869,1.198884,0.202733,0.744677,0.394899
115,1,8,9,1,2.0,4.148427,192.412117,272.140613,188.872038,274.303318,16002.811309,15896.506632,27356.519680,28610.841215,30767.561783,1.096993,1.088769,1.438356,1.510772,1.635285
116,1,11,12,1,2.0,10.624734,200.988076,285.002900,193.714876,292.747934,20964.751363,16559.884190,17178.211975,5933.718610,19302.830625,1.480883,1.140092,0.850734,0.201558,0.973394
399,2,0,1,1,3.0,12.193751,172.757904,238.923668,162.774557,245.925121,21109.114107,14517.853843,13069.783851,14616.175954,15754.237206,1.492051,0.982107,0.613543,0.702821,0.768524
400,2,2,3,1,3.0,10.115897,172.122554,246.742311,164.264151,253.112309,19575.274464,12529.572934,11382.376652,6770.201387,25797.253626,1.373383,0.828280,0.516124,0.249850,1.348336
401,2,4,5,1,3.0,9.713908,173.501129,256.375847,167.577926,264.074916,9222.480962,15268.129534,16282.323514,4435.702771,13555.238558,0.572421,1.040154,0.799012,0.115073,0.641570
402,2,9,11,1,3.0,8.617480,197.747069,276.869347,192.922201,284.009488,19229.923766,13717.918903,11050.747329,12355.302014,24317.469587,1.346665,0.920219,0.496978,0.572294,1.262903


In [19]:
def assign_groups(df,dataname,tranges=(trange0,trange1,trange2)):
    trange0,trange1,trange2 = tranges
    
    #remove timepoints out of range, and 
    grp = re.split('\_',dataname)[0]
    if (grp == 'DMSO') | (grp=='Taxol+5ITu'):
        mask = [(t in trange0) | (t in trange1) | (t in trange2) for t in df['tpoint']]
    elif (grp == '5ITu') | (grp=='Taxol') | (grp=='ZM'):
        mask = [(t in trange0) | (t in trange1) for t in df['tpoint']]
    else:
        print('Group could not be defined for %s' %df['Data Name'])

    df = df.loc[mask].copy()

    # add 'Group' column
    df.loc[:,'Group'] = 'NA'

    df.loc[[(t in trange0) for t in df['tpoint']],'Group'] = 'None'

    if (grp=='Taxol+5ITu'):
        df.loc[[(t in trange1) for t in df['tpoint']],'Group'] = 'Taxol'
        df.loc[[(t in trange2) for t in df['tpoint']],'Group'] = 'Taxol+5ITu'
    elif (grp == '5ITu') | (grp=='Taxol') | (grp=='ZM'):
        df.loc[[(t not in trange0) for t in df['tpoint']],'Group'] = grp
    elif (grp == 'DMSO'):
        df.loc[[(t in trange1) for t in df['tpoint']],'Group'] = 'DMSO'
        df.loc[[(t in trange2) for t in df['tpoint']],'Group'] = 'DMSO_cont'
        
    return df

In [20]:
assign_groups(df_pair,dataname)

Unnamed: 0,fr,kinid1,kinid2,tpoint,z,kkdist,x1,y1,x2,y2,...,int488_2,int560_1,int560_2,int560_mid,normint488_1,normint488_2,normint560_1,normint560_2,normint560_mid,Group
52,0,4,6,1,1.0,9.620535,190.387892,273.688341,181.632832,277.676274,...,22932.112720,26278.969900,5525.475248,17941.986317,0.396594,1.633091,1.376146,0.177989,0.894829,
53,0,5,8,1,1.0,9.850376,200.287289,276.120867,194.206439,283.870265,...,7926.817734,8592.600239,12002.277157,41022.558969,1.796432,0.472180,0.355063,0.551913,2.227335,
113,1,0,2,1,2.0,10.458541,173.163569,250.690520,166.016911,258.326381,...,18673.720560,11706.671649,6003.342359,19538.420796,1.469396,1.303633,0.534847,0.205577,0.986996,
114,1,5,6,1,2.0,4.958473,194.168209,267.002523,189.239366,267.543786,...,17319.798623,5954.070170,15341.184238,9282.620856,1.153869,1.198884,0.202733,0.744677,0.394899,
115,1,8,9,1,2.0,4.148427,192.412117,272.140613,188.872038,274.303318,...,15896.506632,27356.519680,28610.841215,30767.561783,1.096993,1.088769,1.438356,1.510772,1.635285,
116,1,11,12,1,2.0,10.624734,200.988076,285.002900,193.714876,292.747934,...,16559.884190,17178.211975,5933.718610,19302.830625,1.480883,1.140092,0.850734,0.201558,0.973394,
399,2,0,1,1,3.0,12.193751,172.757904,238.923668,162.774557,245.925121,...,14517.853843,13069.783851,14616.175954,15754.237206,1.492051,0.982107,0.613543,0.702821,0.768524,
400,2,2,3,1,3.0,10.115897,172.122554,246.742311,164.264151,253.112309,...,12529.572934,11382.376652,6770.201387,25797.253626,1.373383,0.828280,0.516124,0.249850,1.348336,
401,2,4,5,1,3.0,9.713908,173.501129,256.375847,167.577926,264.074916,...,15268.129534,16282.323514,4435.702771,13555.238558,0.572421,1.040154,0.799012,0.115073,0.641570,
402,2,9,11,1,3.0,8.617480,197.747069,276.869347,192.922201,284.009488,...,13717.918903,11050.747329,12355.302014,24317.469587,1.346665,0.920219,0.496978,0.572294,1.262903,


### Batch analysis

In [21]:
Nsets = len(linepath_list)

df_pair_list = []

for seti in range(Nsets):
    linepath = linepath_list[seti]
    imagepath = imagepath_list[seti]
    
    for dataname in dataname_list[seti]:
        imgpath = imagepath+dataname+'/'
        
        #load features and profiles (with kkdists in pixel)
        df_pair = get_df_pair(dataname,linepath)
        df_pair['Data Name'] = 'Set%d_%s' %(seti,dataname)
            
        #filter out pairs with kk distance out of kkdistrange
        df_pair = df_pair.loc[(df_pair['kkdist']*pixsize>=kkdistrange[0]) & (df_pair['kkdist']*pixsize<=kkdistrange[1])]
        
        #assign group
        df_pair = assign_groups(df_pair,dataname)
        
        #get intensities
        df_pair = getInt(df_pair,imgpath)

        
        #normalize based on cytoplasmic background and peak height
        df_pair = normalize_intensity(df_pair,imgpath)
        
        df_pair_list.append(df_pair)
        
df_pair_all = pd.concat(df_pair_list)

In [22]:
# Save df_prof
df_pair_all.to_csv('kinetochore_pair_intensities.csv',index=False)

In [23]:
Nsets = len(linepath_list)

cytobg_list = []  #488 and 560

for seti in range(Nsets):
    linepath = linepath_list[seti]
    imagepath = imagepath_list[seti]
    
    for dataname in dataname_list[seti]:
        imgpath = imagepath+dataname+'/'
        
        df_pair = df_pair_all[df_pair_all['Data Name'] == 'Set%d_%s' %(seti,dataname)]
        #get intensities
        cytobg_list.append(getCytoBg(df_pair,imgpath))


In [24]:
cytobg_list

[(1615.0977196094698, 1805.3784127347983),
 (1589.0155130582093, 1764.461251828986),
 (1822.4992470135076, 2427.4300798658892),
 (1691.0767963813512, 1680.5292325475702),
 (1696.7787144339488, 1737.7031431686548),
 (1709.2485900464876, 1992.5281479284736),
 (1697.604662143083, 1996.6984850778188),
 (1669.9592096485649, 1754.0281123855145),
 (1615.3872462597265, 1896.8155250625675),
 (1762.7849495640451, 1763.4989033189906),
 (1737.1076518103762, 2801.443873131233),
 (1573.0439394329051, 1807.3087832920717),
 (1771.3286485138742, 1672.2373231798613),
 (1945.6399674407896, 1755.9529937335385),
 (1866.3475174054577, 1804.3298413045861),
 (1762.2899809701371, 1751.7890890329895),
 (1731.7885028090509, 1782.1791136952011),
 (1720.0406720994192, 1795.2000615929237),
 (1731.3339160756711, 1998.1069469703184),
 (1686.1195896329141, 1756.3466180382582),
 (1706.2388652142643, 1797.7797919932582),
 (1747.0908225461935, 1850.3653200920785),
 (1736.7755352004649, 2593.9760188258883),
 (1680.9005998

In [26]:
grped_df.size()

Group
5ITu           742
DMSO          1021
DMSO_cont     1038
None          2646
Taxol          926
Taxol+5ITu     178
ZM             100
dtype: int64

In [27]:
def getBinStats(df_pair,nbins=5,kkdistrange=kkdistrange,pixsize=pixsize,edges=[]):
    # Average, sem of profiles within groups of pairs with similar K-K dists.
    # Either nbins or edges need to be provided

    #if bin edge is not provided, determine edges based on quantiles
    if len(edges) == 0: 
        df_pair = df_pair.loc[(df_pair['kkdist']*pixsize>=kkdistrange[0]) & (df_pair['kkdist']*pixsize<=kkdistrange[1])]
        labels,retbins = pd.qcut(df_pair['kkdist'],np.linspace(0,1,num=nbins+1),labels=np.arange(nbins),retbins=True)
    else:  # if edge is provided, nbins = len(edges)-1
        df_pair = df_pair.loc[(df_pair['kkdist']*pixsize>=edges[0]) & (df_pair['kkdist']*pixsize<=edges[-1])]
        retbins = edges
        nbins = len(edges)-1
        labels = np.zeros(df_pair.shape[0])
        for label in np.arange(nbins):
            labels[(df_pair['kkdist']*pixsize>=edges[label]) & (df_pair['kkdist']*pixsize<=edges[label+1])] = label
    
    res_dicts = []
    for i in np.arange(nbins):
        df = df_pair.loc[labels==i]
        
        avgnormint488 = df[['normint488_1','normint488_2']].values.flatten().mean()
        semnormint488 = np.std(df[['normint488_1','normint488_2']].values.flatten(),axis=0)/np.sqrt(df.shape[0]*2)
        avgnormint560 = df[['normint560_1','normint560_2']].values.flatten().mean()
        semnormint560 = np.std(df[['normint560_1','normint560_2']].values.flatten(),axis=0)/np.sqrt(df.shape[0]*2)
        avgnormint560_mid = df['normint560_mid'].values.mean()
        semnormint560_mid = np.std(df['normint560_mid'].values,axis=0)/np.sqrt(df.shape[0])
        
        avgkkdist = df['kkdist'].mean()
        sdkkdist = df['kkdist'].std()
        kkdist_q1,kkdist_q2,kkdist_q3 = df['kkdist'].quantile([0.25,0.5,0.75])
        
        res_dicts.append({'kkdist_range':(retbins[i],retbins[i+1]),'avgnormint488':avgnormint488,'avgnormint560':avgnormint560,
                          'semnormint488':semnormint488,'semnormint560':semnormint560,'avgnormint560_mid':avgnormint560_mid,
                          'semnormint560_mid':semnormint560_mid,'avgkkdist':avgkkdist,'sdkkdist':sdkkdist,'Npairs':df.shape[0],
                          'kkdist_q1':kkdist_q1,'kkdist_q2':kkdist_q2,'kkdist_q3':kkdist_q3})
        
    return pd.DataFrame(res_dicts)

In [28]:
# import FLIM data mat files
from scipy import io
mat = io.loadmat('dist_all_result_bin8_WT.mat')
FLIMWT = {'edges':mat['Edge'][0][0].flatten(),'mean_FRET_fraction':mat['mean_Y_dist'][0][0].flatten(),
          'std_FRET_fraction':mat['std_Y_dist'][0][0].flatten(),'X':mat['X'][0][0].flatten()}

mat = io.loadmat('dist_all_result_bin8_Treated.mat')
FLIM5ITu = {'edges':mat['Edge'][0][0].flatten(),'mean_FRET_fraction':mat['mean_Y_dist'][0][0].flatten(),
          'std_FRET_fraction':mat['std_Y_dist'][0][0].flatten(),'X':mat['X'][0][0].flatten()}

mat = io.loadmat('dist_all_result_bin4_Taxol.mat')
FLIMTaxol = {'edges':mat['Edge'][0][0].flatten(),'mean_FRET_fraction':mat['mean_Y_dist'][0][0].flatten(),
          'std_FRET_fraction':mat['std_Y_dist'][0][0].flatten(),'X':mat['X'][0][0].flatten()}


In [29]:
df_res_list = [getBinStats(df_pair_all[(df_pair_all['Group']=='DMSO')],edges=FLIMWT['edges']),
               getBinStats(df_pair_all[(df_pair_all['Group']=='Taxol')],edges=FLIMTaxol['edges']),
               getBinStats(df_pair_all[(df_pair_all['Group']=='5ITu')],edges=FLIM5ITu['edges'])]


In [30]:
# Save df_res_list
grps = ['DMSO','Taxol','5ITu']
for grp,df_res in zip(grps,df_res_list):
    df_res.to_json('Dist_vs_INCENP_analysis_%s.json' %grp)
    