In [14]:
execfile('../src/utils.py')
execfile('../src/cellslib.py')
from toolz import assoc

In [1]:
configs = [{'path': '/notebooks/moldev-data/original/07-20-2015/APB HS JS 60X 07.22.2015 ML216 MMC HU TS.txt',
            'savepath': '/notebooks/moldev-data/analyzed/07-20-2015/APB.csv',
            'label': 'APB'},
           {'path': '/notebooks/moldev-data/original/07-20-2015/ssC HS JS 60X 07.20.2015 ML216 MMC HU TS.txt',
            'savepath': '/notebooks/moldev-data/analyzed/07-20-2015/ssC.csv',
            'label': 'ssC'}]

- Rows A-D: U2OS
- Rows E-H: 143b

- Column 1: media control (0.08% DMSO)
- Column 2: DMSO control (0.5% DMSO)
- Columns 3-6: ML216 (100 - 50 - 25 - 12.5 uM)
- Columns 7-10: MMC (400 - 200 - 100 - 50 ng/mL)
- Column 11: HU (1 mM)
- Column 12: TS (5 uM)

In [None]:
cell_types = [('U2OS','ABCD'),('143b','EFGH')]
contents = \
    {'Media Control': [1],
     'DMSO Control': [2],
     'ML216': [3,4,5,6],
     'MMC': [7,8,9,10],
     'HU': [11],
     'TS': [12]}

In [16]:
cell_dict = {"U2OS": ['A01','A02','A03','A04'],
             "143B": ['B01','B02','B03','B04']}

dilutions = ['50 nM','25 nM','12.5 nM','6.25 nM']

date = '07-15-2015'

In [17]:
# {Plate Name: [String],Concentration:[String]} -> DataFrame[Plate Name, Plate ID, Well Name, Concentration]
def create_dilution_df(c):
    well_numbers = ['01','02','03','04']

    x = pd.concat([df({'Concentration':c,
                       '_Well Number': well_numbers,
                       '_Well Letter': well_letter}) \
                    for well_letter in ['A','B']])
    
    x['Well Name'] = x['_Well Letter'] + x['_Well Number']
    x = x.drop(['_Well Letter', '_Well Number'],axis=1)    
    x['Plate ID'] = generate_sid()
    return x

In [18]:
cell_types = create_well_df(cell_dict)

In [19]:
concentrations = create_dilution_df(dilutions)

In [20]:
condition_lookup = pd.merge(concentrations,cell_types,on=['Well Name'])
condition_lookup['Date'] = date

In [21]:
# {path:String, savepath: String, label: String} -> SideEffects(SaveFile)
def analyze_and_save(c):
    """ Clean and save raw LNA and PNA data. """
    my_cell_config = assoc(cell_config,'path',c['path'])
    data = pd.merge(get_cell_data(my_cell_config).rename(columns={"Plate ID":"Plate Name"}),
                    condition_lookup,
                    on = 'Well Name')
    
    data['Condition'] = 'ssC' + ' ' + c['label'] + ' ' + data['Concentration'] + ' ' + data['Cell Type']

    # Write to file
    data.to_csv(c['savepath'],index=False)
    return data

In [22]:
# f = open(configs[0]['path']).read().replace('\r','').split('\n')
# g = [map(rename_column,fi.split('\t')) for fi in f]

In [23]:
# g[4]

In [24]:
# pd.read_csv(StringIO(open(configs[0]['path']).read().replace('\r','')),skiprows=4,delimiter='\t')

In [25]:
# String -> String
def rename_column(col):
    """ Rename column col to remove whitespace, backslashes, prefixes,
        and suffixes (esp. large parenthetic suffix). """
    if col.startswith('Cell:'):
        return col.split('(')[0].lstrip("Cell:").rstrip('/').strip(' ')
    else:
        return col.split('(')[0].rstrip('/').strip(' ')

cell_config = dict(
    plate_delimiter = "ATF",
    delimiter = '\t',
    skiprows = 4,
    dropcols = ['Cell ID',
                'Site ID',
                'MEASUREMENT SET ID',
                '.*ObjectID.*',
                'Laser focus score',
                '\.[0-9]*\Z'],
    normcols = [['Normalized_ColocSpot_area_sum (coloc)',
                  ['ColocSpots_area_sum'],
                  ['FITC-TxRed_coloc_area_sum']],
                ['Normalized_ColocSpot_area_sum (all)',
                  ['ColocSpots_area_sum'],
                  ['FITC-TxRed_all_area_sum']],
        
                ['Normalized coloc spots (by FITC & TxRed)',
                  ['# of Coloc Spots'],
                  ['# of FITC spots', '# of TxRed spots']],
                ['Normalized coloc spots (by FITC)',
                  ['# of Coloc Spots'],
                  ['# of FITC spots']],
                ['Normalized coloc spots (by TxRed)',
                  ['# of Coloc Spots'],
                  ['# of TxRed spots']],
               
                ['Normalized coloc spots (by FITC in coloc)',
                  ['# of Coloc Spots'],
                  ['# of FITC in ColocSpots']],
                ['Normalized coloc spots (by TxRed in coloc)',
                  ['# of Coloc Spots'],
                  ['# of TxRed in ColocSpots']],
                ['Normalized coloc spots (by FITC-TxRed in coloc)',
                  ['# of Coloc Spots'],
                  ['# of FITC-TxRed in ColocSpots']]],

    
    colrename = rename_column,
    check = identity
    )

In [26]:
# analyze_and_save(configs[0])
dataframes = map(analyze_and_save,configs)

In [27]:
sorted(dataframes[1].columns.tolist())

['# of Coloc Spots',
 '# of FITC in ColocSpots',
 '# of FITC spots',
 '# of FITC-TxRed all spots',
 '# of FITC-TxRed in ColocSpots',
 '# of TxRed in ColocSpots',
 '# of TxRed spots',
 'Cell Type',
 'ColocSpots_area_avg',
 'ColocSpots_area_sum',
 'Concentration',
 'Condition',
 'Date',
 'FITC-TxRed_all_area_avg',
 'FITC-TxRed_all_area_sum',
 'FITC-TxRed_coloc_area_avg',
 'FITC-TxRed_coloc_area_sum',
 'FITC_all_IntIntensity_avg',
 'FITC_all_IntIntensity_sum',
 'FITC_all_Intensity_avg',
 'FITC_all_Intensity_sum',
 'FITC_all_area_avg',
 'FITC_all_area_sum',
 'FITC_coloc_IntIntensity_avg',
 'FITC_coloc_IntIntensity_sum',
 'FITC_coloc_Intensity_avg',
 'FITC_coloc_Intensity_sum',
 'FITC_coloc_area_avg',
 'FITC_coloc_area_sum',
 'Normalized coloc spots (by FITC & TxRed)',
 'Normalized coloc spots (by FITC in coloc)',
 'Normalized coloc spots (by FITC)',
 'Normalized coloc spots (by FITC-TxRed in coloc)',
 'Normalized coloc spots (by TxRed in coloc)',
 'Normalized coloc spots (by TxRed)',
 'Nor