In [1]:
import pandas as pd
import numpy as np

In [2]:
barcode_platemap_col_order = ['Plate_Map_Name', 'Assay_Plate_Barcode']
plate_map_col_order = ['well_position', 'broad_sample', 'mmoles_per_liter', 'solvent']
external_metadata_col_order = ['broad_sample', 'cpd_name', 'cpd_name_type', 'cpd_sample_id', 'pert_type', 'control_type', 'dos_library', 'source_name', 'chemist_name', 'vendor_catalog_id', 'user_comment', 'smiles']

Read barcode platemap file

In [3]:
barcode_platemap = (
    pd.read_csv('../2015_Bray_GigaScience/barcode_platemap_25412.csv')
    .replace(25412, 24512)
)
barcode_platemap.shape

(413, 2)

Remove plates that were removed previously 

In [4]:
plates_to_remove = (
    pd.read_csv('../2015_Bray_GigaScience/plates_to_remove_from_giga.csv')
    .rename(columns={'plateid': 'Assay_Plate_Barcode'})
)

barcode_platemap = barcode_platemap.query('Assay_Plate_Barcode!=list(@plates_to_remove.Assay_Plate_Barcode)').reset_index(drop=True)
barcode_platemap.shape

(406, 2)

Drop plate `25568` whose features were not extracted (https://github.com/broadinstitute/cellpainting-gallery/issues/13#issuecomment-1255525365)

In [5]:
barcode_platemap = barcode_platemap.query('Assay_Plate_Barcode!="25568"').reset_index(drop=True)
barcode_platemap.shape

(405, 2)

Create barcode_platemap, platemap and external_metadata files

In [6]:
external_metadata = pd.DataFrame()

for plate_name in barcode_platemap.Assay_Plate_Barcode.unique():
    plate_map_name = barcode_platemap.query('Assay_Plate_Barcode==@plate_name').Plate_Map_Name.values[0]
    platemap_df = (
        pd.read_csv(f'../2015_Bray_GigaScience/platemap/{plate_map_name}.txt', sep='\t')
        .rename(columns={'ASSAY_WELL_ROLE': 'pert_type'})
        .replace({'mock': 'control', 'treated': 'trt'})
        .sort_values(by='well_position')
        .reset_index(drop=True)
    )
    platemap_df['well_position'] = platemap_df['well_position'].str.upper()
    platemap_df['control_type'] = np.where(platemap_df['pert_type'] == "control", "negcon", "")

    temp_df = (
        platemap_df[['broad_sample', 'pert_type', 'control_type']].copy()
    )

    if external_metadata.shape[0] == 0:
        external_metadata = temp_df.copy()
    else:
        external_metadata = pd.concat([external_metadata, temp_df], ignore_index=True, join="inner")

    platemap_df[plate_map_col_order].to_csv(f'../metadata/platemaps/CDRP/platemap/{plate_map_name}.txt', sep='\t', index=False)

barcode_platemap.shape

(405, 2)

In [7]:
external_metadata.shape

(154696, 3)

Remove duplicates

In [8]:
external_metadata.drop_duplicates(inplace=True)
external_metadata.shape

(30617, 3)

Add additional metadata

In [9]:
from operator import add


additional_metadata = (
    pd.read_csv('input/additional_metadata.csv')
    .rename(columns={
        'BROAD_ID': 'broad_sample',
        'CPD_NAME': 'cpd_name',
        'CPD_NAME_TYPE': 'cpd_name_type',
        'CPD_SAMPLE_ID': 'cpd_sample_id',
        'DOS_LIBRARY': 'dos_library',
        'SOURCE_NAME': 'source_name',
        'CHEMIST_NAME': 'chemist_name',
        'VENDOR_CATALOG_ID': 'vendor_catalog_id',
        'CPD_SMILES': 'smiles',
        'USERCOMMENT': 'user_comment',
    })
)

external_metadata = (
    external_metadata.merge(additional_metadata, on='broad_sample', how='left')
    .sort_values(by='broad_sample')
    .reset_index(drop=True)
)

Update DMSO info

In [10]:
external_metadata.loc[external_metadata.pert_type=="control", 'smiles'] = "CS(=O)C"
external_metadata.loc[external_metadata.pert_type=="control", 'cpd_name'] = "DMSO"

In [11]:
barcode_platemap[barcode_platemap_col_order].to_csv('../metadata/platemaps/CDRP/barcode_platemap.csv', index=False)
external_metadata[external_metadata_col_order].to_csv('../metadata/external_metadata/cdrp_compounds.tsv', sep='\t', index=False)