## Statistics extraction from Imaris

#### Load required libraries

In [54]:
import seaborn as sns
import pandas as pd
import numpy as np
import os
import tools.tools as tls
import tools.globalsettings as gbs
logr = gbs.lgr

In [13]:
inpath = '/home/bcardoen/mountcedar/project/data/nabi-robert-ivan/RL.ZikaEr.001/Imaris_Export_20181126/Imaris_ER_Analysis'
outpath = '/home/bcardoen/SFUVault/Repositories/nbr/output'

In [94]:
logr = gbs.lgr
def parse_filename(fname):
    '''
    Get the date, infected state, cellid, replicate number, ER marker
    '''
    keys = ['date', 'infected', 'cellid', 'repnr', 'ermarker']
    contents = fname.split('_')
    infecteds = ['Mock','ZIKV']
    markers = ['KDEL', 'Sec61']
    Rep = 'Rep'
    date = None
    infected = None
    cellid = None
    repnr = None
    ermarker = None
    for c in contents:
        if '201' in c and not date: 
            if '2018' not in c:
                date = c + '8'
            else:
                date = c
        if infected is None:
            for i in infecteds:
                if i in c:
                    infected = bool(infecteds.index(i))
                    break
        if 'Series' in c and not cellid:
            start = c.index('Series')
            digit = c[start + len('Series'):]
            try:
                cellid = int(digit)
            except ValueError as e:
                logr.error('\t  Failed to decode cellnr {} --> Not an integer for filename \n{}'.format(digit, fname))
                raise e
        if 'Rep' in c and not repnr:
            digit = c.replace('Rep', '')
            try:
                repnr = int(digit)
            except ValueError as e:
                logr.error('\t  Failed to decode Repnr {} --> Not an integer for filename \n{}'.format(digit, fname))
        if not ermarker:
            for marker in markers:
                if marker in c:
                    ermarker = marker
                    break
    if date and '14' in date:  # Edge case
        if repnr == 2:
            repnr = 1
    res = {'date':date, 'infected':infected, 'cellid':cellid, 'repnr':repnr, 'ermarker':ermarker}
    if date and infected and cellid and repnr and ermarker:
        return res
    else:
        logr.error('Failed decodion {} \n {}'.format(fname, res))
        raise ValueError('{} failed decoding'.format(fname))

In [160]:
def _check_feature(featurenames, fname):
    logr.info('\t Checking for features {}'.format(featurenames))
    for f in featurenames:
        p = fname.find(f)
        if p != -1:
            logr.info('\t Found {} in {}'.format(f, fname))
            return f
    return None
    

selected = ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
def parse_directory(dirname, featurenames = None):
    if featurenames is None:
        featurenames = selected
    logr.info('\tSelected Features: {}'.format(featurenames))
    dirtree = tls.treedir(dirname)
    logr.info('\t Found {} files'.format(len(dirtree.keys())))
    rx = {}
    for fname, ct in dirtree.items():
        if fname.endswith('.csv'):
#             logr.info('\t Found csv file')
            
            feature = _check_feature(featurenames, fname)
            if feature is None:
#                 logr.info('No feature selected, skipping')
                continue
            logr.info('\t Feature FOUND{}'.format(feature))
            logr.info('\t Reading csv file')
            res = parse_featurefile(ct)
            for k, v in res.items():
                key = '{}_{}'.format(feature, k)
                if key in rx:
                    logr.error('ERROR : {} already in rx'.format(key))
                    raise ValueError
                else:
                    rx['{}_{}'.format(feature, k)] = v
    return rx
            
    # For each CSV in the directory
    # If feature is in the filename list
    # Load the CSV, strip the 3 first lines
    # Load the first column, save it as feature name
    # Compute mean, median, std, N, min, max, sum
    # save in a pandas frame
    
def parse_featurefile(csvfile):
    try:
        data = np.loadtxt(csvfile,comments='#',delimiter=',',skiprows=4, usecols=0)
        values = data
        stats = {'mean':np.mean(values), 'std':np.std(values), 
                 'sum':np.sum(values), 'min':np.min(values), 
                 'max':np.max(values), 'median':np.median(values), 
                 'N':len(values)}
        return stats
    except:
        lgr.error('Failed decoding csv {}'.format(csvfile))

In [14]:
tree = tls.treedir(inpath)

In [23]:
print('\t Have conditions:')
conditions = [k for (k,v) in tree.items() if isinstance(v, dict)]

	 Have conditions:
['ER_withdsRNA_NS4B', 'Central_ER_without_dsRNA', 'ER_CentralER_by boundingboxZ', 'Peripheral_ER_with_dsRNA_NS4B', 'Central_ER_with_NS4B', 'Central_ER_with_dsRNA', 'ER_with_dsRNA', 'Central_ER_with_dsRNA_NS4B', 'ER_with_NS4B', 'ER_PeripheralER_by_boundingboxZ', 'Peripheral_ER_with_NS4B']


In [102]:
data = {}
for C in conditions:
    print('\t {}'.format(C))
    for fname, ct in tree[C].items():
        if isinstance(ct, dict):
            if fname.endswith('Statistics'):
                result = parse_filename(fname)
                result['condition'] = C
                key = tuple((k, result[k]) for k in sorted(result.keys()))
                if key not in data:
                    data[key] = fname
                else:
                    logr.error('ERROR : {} is duplicate'.format(key))
                    logr.error('ERROR : Old entry by fname {}'.format(data[key]))
                    logr.error('ERROR : new entry by fname {}'.format(fname))
                    logr.error('ERROR : Treatment = {}'.format(C))
                    raise ValueError

	 ER_withdsRNA_NS4B
	 Central_ER_without_dsRNA
	 ER_CentralER_by boundingboxZ
	 Peripheral_ER_with_dsRNA_NS4B
	 Central_ER_with_NS4B
	 Central_ER_with_dsRNA
	 ER_with_dsRNA
	 Central_ER_with_dsRNA_NS4B
	 ER_with_NS4B
	 ER_PeripheralER_by_boundingboxZ
	 Peripheral_ER_with_NS4B


In [109]:
d = parse_featurefile('/home/bcardoen/mountcedar/project/data/nabi-robert-ivan/RL.ZikaEr.001/Imaris_Export_20181126/Imaris_ER_Analysis/ER_PeripheralER_by_boundingboxZ/NS4B2_ZIKV_KDEL_500_3D_Rep2_April142018_2_C0_Z000_Series003_Statistics/NS4B2_ZIKV_KDEL_500_3D_Rep2_April142018_2_C0_Z000Series003_Area.csv')

In [110]:
d

{'mean': 0.5658937168924639,
 'std': 0.46757971955809075,
 'sum': 1336.6409592999998,
 'min': 0.0413095,
 'max': 3.45479,
 'median': 0.456437,
 'N': 2362}

In [161]:
q = parse_directory('/home/bcardoen/mountcedar/project/data/nabi-robert-ivan/RL.ZikaEr.001/Imaris_Export_20181126/Imaris_ER_Analysis/ER_PeripheralER_by_boundingboxZ/NS4B2_ZIKV_KDEL_500_3D_Rep2_April142018_2_C0_Z000_Series003_Statistics')

[<ipython-input-160-3a021b121bab>:15 -      parse_directory() ] 	Selected Features: ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:17 -      parse_directory() ] 	 Found 56 files
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature

[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Volume', 'Area', 'BoundingBoxAA_Length', 'Sphericity']
[<ipython-input-160-3a021b121bab>:6 -       _check_feature() ] 	 Found Sphericity in NS4B2_ZIKV_KDEL_500_3D_Rep2_April142018_2_C0_Z000Series003_Sphericity.csv
[<ipython-input-160-3a021b121bab>:27 -      parse_directory() ] 	 Feature FOUNDSphericity
[<ipython-input-160-3a021b121bab>:28 -      parse_directory() ] 	 Reading csv file
[<ipython-input-160-3a021b121bab>:2 -       _check_feature() ] 	 Checking for features ['Vol

In [162]:
q

{'BoundingBoxAA_Length_mean': 0.7328836782387805,
 'BoundingBoxAA_Length_std': 0.2199456814238745,
 'BoundingBoxAA_Length_sum': 1731.0712479999997,
 'BoundingBoxAA_Length_min': 0.42057,
 'BoundingBoxAA_Length_max': 1.05143,
 'BoundingBoxAA_Length_median': 0.841141,
 'BoundingBoxAA_Length_N': 2362,
 'Area_mean': 0.5658937168924639,
 'Area_std': 0.46757971955809075,
 'Area_sum': 1336.6409592999998,
 'Area_min': 0.0413095,
 'Area_max': 3.45479,
 'Area_median': 0.456437,
 'Area_N': 2362,
 'Sphericity_mean': 0.744764451312447,
 'Sphericity_std': 0.10988439116159224,
 'Sphericity_sum': 1759.1336339999998,
 'Sphericity_min': 0.445985,
 'Sphericity_max': 0.960892,
 'Sphericity_median': 0.744698,
 'Sphericity_N': 2362,
 'Volume_mean': 0.026126296563928873,
 'Volume_std': 0.02628750684023818,
 'Volume_sum': 61.710312484,
 'Volume_min': 0.000520917,
 'Volume_max': 0.184908,
 'Volume_median': 0.0186598,
 'Volume_N': 2362}