In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
pd.set_option('precision', 1)

IntDen_new = [('Nucleus (Cycle 1 - DAPI)', 1, 'DAPI'),
              ('BT474-H2BeGFP (Cycle 1 - FITC)', 1, 'FITC'),
              ('MCL1 (Cycle 2 - Cy3)', 2, 'Cy3'),
              ('p-p65NFkB (Cycle 2 - Cy5)', 2, 'Cy5'),
              ('p-Akt (Cycle 3 - FITC)', 3, 'FITC'),
              ('aSMA (Cycle 3 - Cy3)', 2, 'Cy3'),
              ('p-gH2AX (Cycle 3 - Cy5)', 3, 'Cy5'),
              ('GRP78 (Cycle 4 - FITC)', 4, 'FITC'),
              ('Vimentin (Cycle 4 - Cy3)', 4, 'Cy3')]

tabs = [('BT474,Cntrl(rb,c2-4)',   'B', [2,3,4]), 
        ('BT474,30nM(rb,c5-7)',    'B', [5,6,7]),
        ('BT474,300nM(rb,c8-10)',  'B', [8,9,10]),
        
        ('BT474+C3H-scr,Cntrl(rc,c2-4)',  'C', [2,3,4]),
        ('BT474+C3H-scr,30nM(rc,c5-7)',   'C', [5,6,7]),
        ('BT474+C3H-scr,300nM(rc,c8-10)', 'C', [8,9,10]),
        
        ('BT474+C3H-1,Cntrl(rd,c2-4)',  'D', [2,3,4]),
        ('BT474+C3H-1,30nM(rd,c5-7)',   'D', [5,6,7]),
        ('BT474+C3H-1,300nM(rd,c8-10)', 'D', [8,9,10]),
       
        ('BT474+C3H-3,Cntrl(re,c2-4)',  'E', [2,3,4]),
        ('BT474+C3H-3,30nM(re,c5-7)',   'E', [5,6,7]),
        ('BT474+C3H-3,300nM(re,c8-10)', 'E', [8,9,10])]


''' Construct the universal Results-Cyto- data file path in local drive named path
    Enter the row and a list of column indice you want to extract data 
    (e.g row E, column indice = [8, 9, 10])
    Construct a list of well IDs: B02-B10 named WELLS
    Construct a list of field IDs: fld1-fld9 named FIELD
'''

def read_df(path, rows, cols):
    WELLS = []
    
    for r in rows:
        for k in cols:
            if k < 10:
                WELLS.append(r + '0{}'.format(k))
            else:
                WELLS.append(r + '10')
    
    FIELD = ['fld{}'.format(k) for k in range(1, 10)]

    files = [path + '{}{}.txt'.format(w, f) 
             for w in WELLS for f in FIELD]

    ''' Read all Cyto data files in the dataset into a list of dataframes
    '''
    
    df_list = [pd.read_csv(file, sep='\t', engine='python', 
                            usecols=['Label', 'IntDen']) for file in files]
    
    return df_list, WELLS, FIELD

def extract(df):
    well = r'[A-Z]+.*?(?=_)'
    field = r'(?<=d)[\d]{1,1}?(?=:)'
    cell_id = r'(\d+)-(\d+)'
    channel = r'(?<=:)[A-Z]+.*?(?=-)'
    
    df['Cell ID'] = df['Label'].apply(lambda x: int((re.search(cell_id, x).group()).\
                                                        replace('-', '')))
    
    df['Cycle'] = df['Label'].apply(lambda x: int(x[-1]))
    
    df['Channel'] = df['Label'].apply(lambda x: re.search(channel, x).group())
    
    df['Well'] = df['Label'].apply(lambda x: re.search(well, x).group())
    
    df['Field'] = df['Label'].apply(lambda x: re.search(field, x).group())
    
    df['Col'] = df['Well'].apply(lambda w: 10 if w[-1] == '0' else int(w[-1]))
    df['Group'] = df['Col'].apply(lambda c: (c - 2)//3)
    
    df = df[['Cell ID', 'Cycle', 'Channel', 'IntDen', 'Well', 'Field', 'Group']]
    
    return df

def process(df):
    dt = extract(df)

    df_field = dt[(dt['Cycle']==IntDen_new[0][1]) & 
              (dt['Channel']==IntDen_new[0][2])][['Cell ID', 'Well', 'Field', 'Group']]

    for col in IntDen_new:
        dy = dt[(dt['Cycle']==col[1]) & (dt['Channel']==col[2])]
        df_field[col[0]] = dy[['IntDen']].values

    return df_field

In [2]:
path_cyto = './032018_48hrs/Results-Cyto-'

with pd.ExcelWriter('./032018_48hrs/Cyto.xlsx') as writer:  
    for t in tabs:
        rows, cols = [t[1]], t[2]
        df, WELLS, FIELD = read_df(path_cyto, rows, cols)

        ''' Double check WELLS, FIELDs that have been processed in Cyto dataset
            Double check the number of files in the dataset that have been read
        '''
        print('\n', '* ' * 16, 'Cyto Reading ', '* ' * 16)
        n= len(df)
        print('\nWells:', WELLS)
        print('\nFields:', FIELD)
        print('\nNumber of files that have been read: ', n)

        df_cells = process(df[0])
        for df in df[1:]:
            dt = process(df)
            df_cells = df_cells.append(dt, ignore_index=True)
            
        df_cells.to_excel(writer, index=False, sheet_name=t[0])


 * * * * * * * * * * * * * * * *  Cyto Reading  * * * * * * * * * * * * * * * * 

Wells: ['B02', 'B03', 'B04']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Cyto Reading  * * * * * * * * * * * * * * * * 

Wells: ['B05', 'B06', 'B07']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Cyto Reading  * * * * * * * * * * * * * * * * 

Wells: ['B08', 'B09', 'B10']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Cyto Reading  * * * * * * * * * * * * * * * * 

Wells: ['C02', 'C03', 'C04']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Cyto Reading  * * * *

In [3]:
path_nucl = './032018_48hrs/Results-Nuc-'

with pd.ExcelWriter('./032018_48hrs/Nuc.xlsx') as writer:  
    for t in tabs:
        rows, cols = [t[1]], t[2]
        df, WELLS, FIELD = read_df(path_nucl, rows, cols)

        ''' Double check WELLS, FIELDs that have been processed in Nuc dataset
            Double check the number of files in the dataset that have been read
        '''
        print('\n', '* ' * 16, 'Nuc Reading ', '* ' * 16)
        n= len(df)
        print('\nWells:', WELLS)
        print('\nFields:', FIELD)
        print('\nNumber of files that have been read: ', n)

        df_cells = process(df[0])
        for df in df[1:]:
            dt = process(df)
            df_cells = df_cells.append(dt, ignore_index=True)
            
        df_cells.to_excel(writer, index=False, sheet_name=t[0])


 * * * * * * * * * * * * * * * *  Nuc Reading  * * * * * * * * * * * * * * * * 

Wells: ['B02', 'B03', 'B04']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Nuc Reading  * * * * * * * * * * * * * * * * 

Wells: ['B05', 'B06', 'B07']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Nuc Reading  * * * * * * * * * * * * * * * * 

Wells: ['B08', 'B09', 'B10']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Nuc Reading  * * * * * * * * * * * * * * * * 

Wells: ['C02', 'C03', 'C04']

Fields: ['fld1', 'fld2', 'fld3', 'fld4', 'fld5', 'fld6', 'fld7', 'fld8', 'fld9']

Number of files that have been read:  27

 * * * * * * * * * * * * * * * *  Nuc Reading  * * * * * * 