In [34]:
execfile('../src/utils.py')
execfile('../src/cellslib.py')
from toolz import assoc

In [35]:
configs = [{'path': '/notebooks/moldev-data/original/06-24-2015/ssC HS JS 06.24.2015 LNA.txt',
            'savepath': '/notebooks/moldev-data/analyzed/06-24-2015/ssC LNA.csv',
            'label': 'LNA'},
           {'path': '/notebooks/moldev-data/original/06-24-2015/ssC HS JS 06.24.2015 PNA.txt',
            'savepath': '/notebooks/moldev-data/analyzed/06-24-2015/ssC PNA.csv',
            'label': 'PNA'}]

In [36]:
cell_dict = {"U2OS": ['A01','A02','A03','A04'],
             "143B": ['B01','B02','B03','B04']}

dilutions = [{'Plate Name': [32,34],
              'Concentration': ['2000 nM','1000 nM','500 nM','250 nM']},
             {'Plate Name': [33,35],
              'Concentration': ['125 nM','62.5 nM','31.25 nM','0 nM']}]

date = '06-24-2015'

In [37]:
# {Plate Name: [String],Concentration:[String]} -> DataFrame[Plate Name, Plate ID, Well Name, Concentration]
def create_dilution_df(c):
    well_numbers = ['01','02','03','04']

    x = pd.concat([df({'Concentration':c['Concentration'],
                       '_Well Number': well_numbers,
                       '_Well Letter': well_letter}) \
                    for well_letter in ['A','B']])
    
    x['Well Name'] = x['_Well Letter'] + x['_Well Number']
    x = x.drop(['_Well Letter', '_Well Number'],axis=1)
    def add_plate_info(plate_name):
        y = x.copy()
        y['Plate Name'] = plate_name
        y['Plate ID'] = generate_sid()
        return y
    
    return thread_last(c['Plate Name'],
                       (map, add_plate_info),
                       pd.concat)

In [38]:
cell_types = create_well_df(cell_dict)

In [39]:
concentrations = thread_last(dilutions,
                             (map,create_dilution_df),
                             pd.concat)

In [40]:
condition_lookup = pd.merge(concentrations,cell_types,on=['Well Name'])
condition_lookup['Date'] = date

In [41]:
# {path:String, savepath: String, label: String} -> SideEffects(SaveFile)
def analyze_and_save(c):
    """ Clean and save raw LNA and PNA data. """
    my_cell_config = assoc(cell_config,'path',c['path'])
    data = pd.merge(get_cell_data(my_cell_config).rename(columns={"Plate ID":"Plate Name"}),
                    condition_lookup,
                    on = ['Plate Name','Well Name'])
    
    data['Condition'] = 'ssC' + ' ' + c['label'] + ' ' + data['Concentration'] + ' ' + data['Cell Type']

    # Write to file
    data.to_csv(c['savepath'],index=False)
    return len(data)

In [42]:
# String -> String
def rename_column(col):
    """ Rename column col to remove whitespace, backslashes, prefixes,
        and suffixes (esp. large parenthetic suffix). """
    if col.startswith('Cell:'):
        return col.split('(')[0].lstrip("Cell:").rstrip('/').strip(' ')
    else:
        return col.split('(')[0].rstrip('/').strip(' ')

cell_config = dict(
    plate_delimiter = "ATF",
    delimiter = '\t',
    skiprows = 4,
    dropcols = ['Cell ID',
                'Site ID',
                'MEASUREMENT SET ID',
                '.*ObjectID.*',
                'Laser focus score',
                '\.[0-9]*\Z'],
    normcols = [['Normalized APB spots (by FITC & TxRed)',
                  ['# of APBs'],
                  ['# of FITC spots', '# of TxRed spots']],
                ['Normalized APB spots (by TxRed)',
                  ['# of APBs'],
                  ['# of TxRed spots']],
                ['Normalized APB spots (by FITC)',
                  ['# of APBs'],
                  ['# of FITC spots']],
                
                ['Normalized Coloc avg area (by FITC & TxRed)',
                  ['Area_Coloc_Avg'],
                  ['Area_FITC','Area_TxRed']],
                ['Normalized Coloc avg area (by FITC)',
                  ['Area_Coloc_Avg'],
                  ['Area_FITC']],
                ['Normalized Coloc avg area (by TxRed)',
                  ['Area_Coloc_Avg'],
                  ['Area_TxRed']],                
                
                ['Normalized Coloc spots (by FITC & TxRed)',
                  ['# Coloc Spots'],
                  ['# of FITC spots', '# of TxRed spots']],
                ['Normalized Coloc spots (by FITC)',
                  ['# Coloc Spots'],
                  ['# of FITC spots']],
                ['Normalized Coloc spots (by TxRed)',
                  ['# Coloc Spots'],
                  ['# of TxRed spots']],
                
                ['Normalized Coloc total area (by FITC & TxRed)',
                  ['Total_Area_Coloc'],
                  ['Total_Area_FITC','Total_Area_TxRed']],
                ['Normalized Coloc total area (by FITC)',
                  ['Total_Area_Coloc'],
                  ['Total_Area_FITC']],
                ['Normalized Coloc total area (by TxRed)',
                  ['Total_Area_Coloc'],
                  ['Total_Area_TxRed']],
                
                ['Normalized Coloc total area (by FITC-TxRed colocs)',
                  ['Total_Area_Coloc'],
                  ['FITC-TxRed_Area_Total']]],
    colrename = rename_column,
    check = identity
    )

In [43]:
# analyze_and_save(configs[0])
map(analyze_and_save,configs)

[3158, 4290]