In [1]:
import pandas as pd
import utils
import numpy as np
from pycytominer.cyto_utils import write_gct

In [2]:
experiment_df = pd.read_csv('output/experiment-metadata.tsv', sep='\t')

corr_replicating_df = pd.DataFrame()
group_by_feature = 'Metadata_broad_sample'

In [3]:
all_plates_df = pd.DataFrame()

for i, plate in enumerate(experiment_df.Assay_Plate_Barcode.unique()):
    batch = experiment_df.loc[i, 'Batch']
    df = (
        pd.read_csv(f'../profiles/{batch}/{plate}/{plate}_normalized_feature_select_negcon_batch.csv.gz')
    )
    all_plates_df = utils.concat_profiles(all_plates_df, df)

all_plates_df = utils.remove_all_control_empty_wells(all_plates_df)

all_plates_conc_1_df = utils.remove_all_control_empty_wells(all_plates_df.query('Metadata_mmoles_per_liter==1'))
all_plates_conc_4_df = utils.remove_all_control_empty_wells(all_plates_df.query('Metadata_mmoles_per_liter==4'))

all_plates_conc_1_consensus_df = utils.consensus(all_plates_conc_1_df, 'Metadata_broad_sample')
all_plates_conc_4_consensus_df = utils.consensus(all_plates_conc_4_df, 'Metadata_broad_sample')

In [4]:
replicating_corr_conc_1_df = (
    utils.corr_between_replicates_df(all_plates_conc_1_df, group_by_feature)
    .rename(columns={'replicate_correlation': 'Metadata_replicate_correlation'})
)
replicating_corr_conc_4_df = (
    utils.corr_between_replicates_df(all_plates_conc_4_df, group_by_feature)
    .rename(columns={'replicate_correlation': 'Metadata_replicate_correlation'})
)

all_plates_conc_1_consensus_df = all_plates_conc_1_consensus_df.merge(replicating_corr_conc_1_df, on='Metadata_broad_sample', how='left')
all_plates_conc_4_consensus_df = all_plates_conc_4_consensus_df.merge(replicating_corr_conc_4_df, on='Metadata_broad_sample', how='left')

corr_df = pd.concat([all_plates_conc_1_consensus_df, all_plates_conc_4_consensus_df], ignore_index=True, join='inner')

In [5]:
# Write gct files using consensus profiles
write_gct(corr_df, 'output/consensus_profiles.gct')