## Compile dataset of clones resistant to other drugs

**Gregory Way, 2021**

These clones are resistant to Ixazomib and CB-5083.
I create a dataset of DMSO-treated clones to later apply the bortezomib resistance signature.

In [1]:
import sys
import pathlib
import numpy as np
import pandas as pd

from pycytominer.cyto_utils import infer_cp_features

sys.path.insert(0, "../2.describe-data/scripts")
from processing_utils import load_data

In [2]:
np.random.seed(1234)

In [3]:
data_dir = pathlib.Path("../0.generate-profiles/profiles")
cell_count_dir = pathlib.Path("../0.generate-profiles/cell_counts/")

output_dir = pathlib.Path("data")

profile_suffix = "normalized.csv.gz"

In [4]:
datasets = {
    "ixazomib": {
        "2020_08_24_Batch9": ["218698"],
        "2020_09_08_Batch10": ["218854", "218858"],
    },
    "cb5083": {
        "2020_08_24_Batch9": ["218696", "218774"],
        "2020_09_08_Batch10": ["218852", "218856"],
    }
}

In [5]:
full_df = []
for dataset in datasets:
    dataset_df = []
    for batch in datasets[dataset]:
        plates = datasets[dataset][batch]
        
        df = load_data(
            batch=batch,
            plates=plates,
            profile_dir=data_dir,
            suffix=profile_suffix,
            combine_dfs=True,
            harmonize_cols=True,
            add_cell_count=True,
            cell_count_dir=cell_count_dir
        )
        
        # Add important metadata features
        df = df.assign(
            Metadata_dataset=dataset,
            Metadata_batch=batch,
            Metadata_clone_type="resistant",
            Metadata_clone_type_indicator=1,
            Metadata_model_split="otherclone"
        )

        df.loc[df.Metadata_clone_number.str.contains("WT"), "Metadata_clone_type"] = "sensitive"
        df.loc[df.Metadata_clone_number.str.contains("WT"), "Metadata_clone_type_indicator"] = 0
        dataset_df.append(df)

    # Merge plates of the same dataset together
    dataset_df = pd.concat(dataset_df, axis="rows", sort=False).reset_index(drop=True)
    
    # Generate a unique sample ID
    # (This will be used in singscore calculation)
    dataset_df = dataset_df.assign(
        Metadata_unique_sample_name=[f"profile_{x}_{dataset}" for x in range(0, dataset_df.shape[0])]
    )
    
    full_df.append(dataset_df)

full_df = pd.concat(full_df, axis="rows", sort=False).reset_index(drop=True)

In [6]:
full_df

Unnamed: 0,Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_cell_density,Metadata_celltype_shorthand_from_plate_graph,Metadata_clone_number,Metadata_date,Metadata_plate_map_name,Metadata_time_to_adhere,...,Nuclei_Texture_Variance_RNA_20_03,Nuclei_Texture_Variance_RNA_5_00,Nuclei_Texture_Variance_RNA_5_01,Nuclei_Texture_Variance_RNA_5_02,Nuclei_Texture_Variance_RNA_5_03,Metadata_dataset,Metadata_clone_type,Metadata_clone_type_indicator,Metadata_model_split,Metadata_unique_sample_name
0,218698,B02,2020_08_24_Batch9,12640,5x10^3 cells/well,1,WT_parental,20200818,218698,48 hr,...,-0.810931,-0.835064,-0.822127,-0.826348,-0.824009,ixazomib,sensitive,0,otherclone,profile_0_ixazomib
1,218698,B03,2020_08_24_Batch9,8927,5x10^3 cells/well,2,WT clone 04,20200818,218698,48 hr,...,0.972116,0.915041,0.922351,0.916150,0.909702,ixazomib,sensitive,0,otherclone,profile_1_ixazomib
2,218698,B04,2020_08_24_Batch9,6044,5x10^3 cells/well,3,WT clone 05,20200818,218698,48 hr,...,0.680603,0.672812,0.668694,0.678737,0.661965,ixazomib,sensitive,0,otherclone,profile_2_ixazomib
3,218698,B05,2020_08_24_Batch9,7530,5x10^3 cells/well,4,WT clone 06,20200818,218698,48 hr,...,0.121605,0.160929,0.155896,0.167694,0.166032,ixazomib,sensitive,0,otherclone,profile_3_ixazomib
4,218698,B06,2020_08_24_Batch9,4726,5x10^3 cells/well,5,WT clone 07,20200818,218698,48 hr,...,1.517792,1.325911,1.337747,1.330573,1.334186,ixazomib,sensitive,0,otherclone,profile_4_ixazomib
5,218698,B07,2020_08_24_Batch9,9122,5x10^3 cells/well,6,Ixazomib clone 01,20200818,218698,48 hr,...,0.057692,0.171347,0.180554,0.185394,0.178843,ixazomib,resistant,1,otherclone,profile_5_ixazomib
6,218698,B08,2020_08_24_Batch9,7016,5x10^3 cells/well,7,Ixazomib clone 02,20200818,218698,48 hr,...,-0.541310,-0.496192,-0.504635,-0.497428,-0.496317,ixazomib,resistant,1,otherclone,profile_6_ixazomib
7,218698,B09,2020_08_24_Batch9,24069,5x10^3 cells/well,8,Ixazomib clone 03,20200818,218698,48 hr,...,-1.550256,-1.604438,-1.605135,-1.605810,-1.604571,ixazomib,resistant,1,otherclone,profile_7_ixazomib
8,218698,B10,2020_08_24_Batch9,20546,5x10^3 cells/well,9,Ixazomib clone 04,20200818,218698,48 hr,...,-1.274080,-1.311721,-1.309912,-1.309531,-1.308922,ixazomib,resistant,1,otherclone,profile_8_ixazomib
9,218698,B11,2020_08_24_Batch9,13874,5x10^3 cells/well,10,Ixazomib clone 05,20200818,218698,48 hr,...,-0.675401,-0.662610,-0.673556,-0.669747,-0.670629,ixazomib,resistant,1,otherclone,profile_9_ixazomib


In [6]:
# Reorder features
common_metadata = infer_cp_features(full_df, metadata=True)
morph_features = infer_cp_features(full_df)

full_df = full_df.reindex(common_metadata + morph_features, axis="columns")

print(full_df.shape)
full_df.head()

(420, 3546)


Unnamed: 0,Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_cell_density,Metadata_celltype_shorthand_from_plate_graph,Metadata_clone_number,Metadata_date,Metadata_plate_map_name,Metadata_time_to_adhere,...,Nuclei_Texture_Variance_RNA_10_02,Nuclei_Texture_Variance_RNA_10_03,Nuclei_Texture_Variance_RNA_20_00,Nuclei_Texture_Variance_RNA_20_01,Nuclei_Texture_Variance_RNA_20_02,Nuclei_Texture_Variance_RNA_20_03,Nuclei_Texture_Variance_RNA_5_00,Nuclei_Texture_Variance_RNA_5_01,Nuclei_Texture_Variance_RNA_5_02,Nuclei_Texture_Variance_RNA_5_03
0,218698,B02,2020_08_24_Batch9,12640,5x10^3 cells/well,1,WT_parental,20200818,218698,48 hr,...,-0.812909,-0.824584,-0.809436,-0.822641,-0.81119,-0.810931,-0.835064,-0.822127,-0.826348,-0.824009
1,218698,B03,2020_08_24_Batch9,8927,5x10^3 cells/well,2,WT clone 04,20200818,218698,48 hr,...,0.898702,0.936022,0.930049,0.92725,0.925148,0.972116,0.915041,0.922351,0.91615,0.909702
2,218698,B04,2020_08_24_Batch9,6044,5x10^3 cells/well,3,WT clone 05,20200818,218698,48 hr,...,0.65912,0.632172,0.685941,0.606126,0.662649,0.680603,0.672812,0.668694,0.678737,0.661965
3,218698,B05,2020_08_24_Batch9,7530,5x10^3 cells/well,4,WT clone 06,20200818,218698,48 hr,...,0.176237,0.188908,0.190323,0.14004,0.203476,0.121605,0.160929,0.155896,0.167694,0.166032
4,218698,B06,2020_08_24_Batch9,4726,5x10^3 cells/well,5,WT clone 07,20200818,218698,48 hr,...,1.332638,1.352189,1.484521,1.584274,1.511908,1.517792,1.325911,1.337747,1.330573,1.334186


In [7]:
pd.crosstab(full_df.Metadata_dataset, full_df.Metadata_model_split)

Metadata_model_split,otherclone
Metadata_dataset,Unnamed: 1_level_1
cb5083,240
ixazomib,180


In [8]:
pd.crosstab(full_df.Metadata_clone_number, full_df.Metadata_model_split)

Metadata_model_split,otherclone
Metadata_clone_number,Unnamed: 1_level_1
CB5038 clone 13,24
CB5038 clone 14,24
CB5038 clone 15,24
CB5038 clone 16,24
CB5038 clone 17,24
Ixazomib clone 01,18
Ixazomib clone 02,18
Ixazomib clone 03,18
Ixazomib clone 04,18
Ixazomib clone 05,18


In [9]:
output_file = pathlib.Path(f"{output_dir}/otherclones_normalized_profiles.tsv.gz")
full_df.to_csv(output_file, sep="\t", index=False)