In [1]:
import pandas as pd
import sys
import os
import synapseclient
import itertools
import numpy as np
import re

#local libs
sys.path.append("/home/apratap/dev/appys/lib/")
import synapseutils
import utils

syn = synapseclient.login()

Welcome, Abhishek Pratap!


### Source the functions for processing OHSU plates

In [2]:
#common plate reader functions
%run '~/dev/AML/DrugScreenHarmonization/common_plate_reader_functions.ipynb'

Welcome, Abhishek Pratap!


### Get data for all OHSU Samples done at Phoenix 

In [3]:
dataFiles_df = syn.query("select id from entity where parentId == 'syn4940877'")
dataFiles_df = synapseutils.query2df(dataFiles_df)
dataFiles = [ syn.get(x).path for x in dataFiles_df.id ]

### Process plate reader data

In [4]:
mdata = [ os.path.basename(x).replace('_RAW.xlsx','').split('_') for x in dataFiles ]
mdata = pd.DataFrame.from_records(mdata, columns=['plate_origin', 'plate_version', 'batch', 'run_by', 'run_at', 'run_date',
                                                             'sampleID','assay', 'plate_reader','read_time'])
mdata['file'] = dataFiles

In [5]:
data = list()
for num,row in mdata.iterrows():
    temp_data = process_OHSU_plates_run_at_OHSU(row.file)    
    #add the metadata to processed raw data
    for key,val in row.iterkv():
        temp_data[key]=val
    data.append(temp_data)
#final pandas df
data  = pd.concat(data, axis=0)

In [6]:
#Fix plate version to match 
data.ix[data.plate_version == 'MarcTest123', 'plate_version'] = 'MarcTest123_v6'
data.ix[data.plate_version == 'FDA v3', 'plate_version'] = 'FDA_v3'
data.plate_version.value_counts()

MarcTest123_v6    1152
FDA_v3             384
dtype: int64

### Join with the plate map to get the meta data

In [7]:
OHSU_plate_maps = pd.concat([OHSU_v6_plateMap, OHSU_FDA_plateMap], axis=0)
data = pd.merge(OHSU_plate_maps,data, how='right', 
                            left_on = ['plate_version','Plate_Num', 'Well_Row', 'Well_Column'],
                            right_on = ['plate_version', 'plateNum' ,'row', 'col'])

### Normalize the plate data

In [9]:
data.columns

Index([               u'Concentration',                         u'Drug',
                    u'Inhibitor_Panel',                    u'Plate_Num',
                               u'Unit',                  u'Well_Column',
                           u'Well_Row', u'drug_replicate_across_plates',
                      u'plate_version',                          u'row',
                                u'col',                        u'value',
                           u'plateNum',                 u'plate_origin',
                              u'batch',                       u'run_by',
                             u'run_at',                     u'run_date',
                           u'sampleID',                        u'assay',
                       u'plate_reader',                    u'read_time',
                               u'file'],
      dtype='object')

In [19]:
grp = data.groupby(['plate_version', 'plate_origin' ,'run_at','assay', 'Plate_Num' , 
                    'plate_reader' ,'read_time', 'run_date'])
norm_factors = grp.apply(calc_norm_factors).reset_index()
#merge the normFactors
data = pd.merge(data, norm_factors)
#normValue
data['normValue'] = (data.value - data.pos_control) / ( data.median_DMSO - data.pos_control)
data.normValue = data.normValue.map(lambda x: np.around(x, 4))

In [21]:
required_columns = ['plate_version', 'plateNum','plate_origin' , 'run_at'  ,'run_date','sampleID','assay' ,
                    'plate_reader', 'read_time' , 'row' ,'col', 'Drug', 'drug_replicate_across_plates', 'Concentration', 'value', 'Unit', 
                    'normValue' , 'pos_control' ,'median_DMSO']
data = data[required_columns]

### Upload to Synapse

In [18]:
outfile = 'patient_sample_on_OHSU_plates_run_at_OHSU_compiledData.tsv'
final_data.to_csv(outfile, sep = '\t', index=False)
syn.store(synapseclient.File(outfile, parentId = 'syn4932396'),
          used = dataFiles_df.id.tolist(),
          executed ='https://github.com/apratap/AML/blob/master/DrugScreenHarmonization/patient_samples/OHSU_plates/run_at_Phoenix.ipynb')
!rm $outfile


##################################################
 Uploading file to Synapse storage 
##################################################
Uploaded Chunks [####################]100.00%     2.1MB/2.1MB patient_sample_on_OHSU_plates_run_at_Phoenix_compiledData.tsv Done...
Upload completed in 10 seconds.
