In [1]:
import pandas as pd
import utils
import matplotlib.pyplot as plt
import random
from pycytominer.cyto_utils import write_gct

In [2]:
experiment_df = pd.read_csv('output/experiment-metadata.tsv', sep='\t')

all_consensus_df = pd.DataFrame()
group_by_feature = 'Metadata_broad_sample'

experiment_df.head()

Unnamed: 0,Batch,Plate_Map_Name,Assay_Plate_Barcode,Cell_Type
0,2021_04_07_Batch1,CR-12982_Gray_cmpds_AB00027680,BR00122970,PATU-8988T
1,2021_04_07_Batch1,CR-12982_Gray_cmpds_AB00027680,BR00122971,PATU-8988T
2,2021_04_07_Batch1,CR-12982_Gray_cmpds_AB00027680,BR00122972,MDA-MB-231
3,2021_04_07_Batch1,CR-12982_Gray_cmpds_AB00027680,BR00122973,MDA-MB-231
4,2021_04_07_Batch1,CR-12982_Gray_cmpds_AB00027680,BR00122974,U2OS


In [3]:
for i, cell_type in enumerate(experiment_df.Cell_Type.unique()):
    batch = experiment_df.loc[i, 'Batch']
    cell_type_df = experiment_df.query('Cell_Type==@cell_type')
    all_plates_df = pd.DataFrame()
    for plate in cell_type_df.Assay_Plate_Barcode.unique():
        df = (
            pd.read_csv(f'../profiles/{batch}/{plate}/{plate}_normalized_feature_select_negcon_batch.csv.gz', dtype={'Metadata_mmoles_per_liter':str})
            .assign(Metadata_sample=lambda x: x.Metadata_Supplier_Catalog+'_'+x.Metadata_mmoles_per_liter)
            .assign(Metadata_Cell_Type=cell_type)
        )
        all_plates_df = utils.concat_profiles(all_plates_df, df)

    all_plates_df.Metadata_sample.fillna('DMSO', inplace=True)

    replicating_corr_df = utils.corr_between_replicates_df(all_plates_df, 'Metadata_sample')

    consensus_df = utils.consensus(all_plates_df, 'Metadata_sample')

    replicating_corr_df['Cell_Type'] = f'{cell_type}'
    replicating_corr_df[['Metadata_Supplier_Catalog','Concentration']] = replicating_corr_df.Metadata_sample.str.split('_',expand=True)

    consensus_df = consensus_df.merge(replicating_corr_df[['replicate_correlation', 'Metadata_sample']], on='Metadata_sample', how='left')

    all_consensus_df = utils.concat_profiles(all_consensus_df, consensus_df)

In [4]:
all_consensus_df.rename(columns={'replicate_correlation': 'Metadata_replicate_correlation'}, inplace=True)
write_gct(all_consensus_df, 'output/consensus_profiles.gct')