## Apply bortezomib resistance signature to other drug resistant clones

**Gregory Way, 2021**

In this project, we also collected Cell Painting readouts for two other cell line clone categories: Ixazomib and CB-5083 resistant clones.

Here, I apply the bortezomib resistant signature to these clones.

I modified this script from `2.apply-bortezomib-signature.ipynb`

**Yu Han, 2021**

I did not make any changes to Greg Way's original 9.apply-signature-otherclones script, except loading in new results from 8.1 and changing output file names. The signatures are directly applied to the clone dataset in 8.1.

In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(singscore))

source(file.path("utils", "singscore_utils.R"))

In [2]:
seed <- 1234
num_permutations <- 1000
sig_dataset <- "bortezomib"
dataset <- "otherclones"

data_dir <- "data"
input_results_dir <- file.path("results", "signatures")
output_dir <- file.path("results", "singscore")

data_file <- file.path(data_dir, paste0(dataset, "_normalized_profiles_LAST_BATCH_VALIDATION.tsv.gz"))
feat_file <- file.path(data_dir, "dataset_features_selected.tsv")
signature_file <- file.path(input_results_dir, paste0("signature_summary_", sig_dataset, "_signature.tsv.gz"))
tukey_file <- file.path(input_results_dir, paste0("tukey_results_", sig_dataset, "_signature.tsv.gz"))
output_results_file <- file.path(output_dir, paste0("singscore_results_LAST_BATCH_VALIDATION", dataset, ".tsv.gz"))

In [3]:
set.seed(seed)

In [4]:
# Load feature selected features
all_selected_features_df <- readr::read_tsv(feat_file, col_types = readr::cols())
head(all_selected_features_df, 3)

features,dataset
<chr>,<chr>
Cells_AreaShape_Compactness,bortezomib
Cells_AreaShape_Eccentricity,bortezomib
Cells_AreaShape_Extent,bortezomib


In [5]:
# Load profiles
bulk_col_types <- readr::cols(
    .default = readr::col_double(),
    Metadata_Plate = readr::col_character(),
    Metadata_Well = readr::col_character(),
    Metadata_cell_count = readr::col_integer(),
    Metadata_batch = readr::col_character(),
    Metadata_clone_number = readr::col_character(),
    Metadata_plate_map_name = readr::col_character(),
    Metadata_treatment = readr::col_character(),
    Metadata_dataset = readr::col_character(),
    Metadata_clone_type = readr::col_character(),
    Metadata_clone_type_indicator = readr::col_character(),
    Metadata_model_split = readr::col_character(),
    Metadata_cell_density = readr::col_character(),
    Metadata_treatment_time = readr::col_character(),
    Metadata_unique_sample_name = readr::col_character(),
    Metadata_time_to_adhere = readr::col_character()
)

data_df <- readr::read_tsv(data_file, col_types = bulk_col_types)

# Apply feature selection performed in 0.compile-bulk-datasets
selected_features <- all_selected_features_df %>%
    dplyr::filter(dataset == !!sig_dataset) %>%
    dplyr::pull(features)

data_df <- data_df %>%
    dplyr::select(starts_with("Metadata"), all_of(selected_features))

print(dim(data_df))
head(data_df, 4)

[1] 200 800


Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_cell_density,Metadata_celltype_shorthand_from_plate_graph,Metadata_clone_number,Metadata_date,Metadata_plate_map_name,Metadata_time_to_adhere,⋯,Nuclei_Texture_InfoMeas2_AGP_5_02,Nuclei_Texture_InfoMeas2_DNA_5_02,Nuclei_Texture_InfoMeas2_ER_20_01,Nuclei_Texture_InfoMeas2_ER_5_00,Nuclei_Texture_InfoMeas2_RNA_5_00,Nuclei_Texture_InverseDifferenceMoment_ER_20_01,Nuclei_Texture_InverseDifferenceMoment_RNA_20_01,Nuclei_Texture_SumAverage_DNA_20_03,Nuclei_Texture_SumEntropy_DNA_20_03,Nuclei_Texture_SumVariance_RNA_20_03
<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
221057,B05,2021_08_02_Batch24,1814,2.5x10^3 cells/well,4,WT clone 10,20210728,221057,48 hr,⋯,-0.3144547,-0.3437437,-0.1018554,-0.2638773,-0.04099116,0.6898421,0.6760525,-0.18427512,0.8406786,0.01656622
221057,B06,2021_08_02_Batch24,5481,2.5x10^3 cells/well,5,WT clone 12,20210728,221057,48 hr,⋯,-0.4917421,-0.2220907,0.0345507,-0.1007458,-0.2629476,0.1302506,0.2230243,1.36205297,-0.5160039,-0.43167174
221057,B10,2021_08_02_Batch24,1925,2.5x10^3 cells/well,4,WT clone 10,20210728,221057,48 hr,⋯,0.1631125,0.3820743,0.3073186,0.2186449,0.35042776,0.1596681,-0.1958735,0.08868622,0.8455309,0.18406486
221057,B11,2021_08_02_Batch24,3910,2.5x10^3 cells/well,5,WT clone 12,20210728,221057,48 hr,⋯,-0.6951889,-0.3728656,-0.3208913,-0.8540147,-0.57432367,0.9276177,1.2615711,0.74297204,-0.2969278,-0.91253833


In [6]:
table(data_df$Metadata_clone_type_indicator)


  0   1 
120  80 

In [7]:
# Load signatures
sig_col_types <- readr::cols(
    features = readr::col_character(),
    non_specific_exclude = readr::col_logical(),
    final_signature = readr::col_logical(),
    dataset = readr::col_character()
)

signature_df <- readr::read_tsv(signature_file, col_types = sig_col_types)

print(dim(signature_df))
head(signature_df, 4)

[1] 782   8


features,non_status_significant_exclude,batch_exclude,cell_count_exclude,non_specific_exclude,treatment_time_exclude,final_signature,dataset
<chr>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<chr>
Cells_AreaShape_Compactness,False,False,False,True,False,False,bortezomib
Cells_AreaShape_Eccentricity,True,False,True,True,False,False,bortezomib
Cells_AreaShape_Extent,False,False,False,True,False,False,bortezomib
Cells_AreaShape_FormFactor,False,False,True,True,False,False,bortezomib


In [8]:
# Load Tukey results (to determine if feature is "up" or "down")
tukey_cols <- readr::cols(
    term = readr::col_character(),
    comparison = readr::col_character(),
    estimate = readr::col_double(),
    conf.low = readr::col_double(),
    conf.high = readr::col_double(),
    adj.p.value = readr::col_double(),
    feature = readr::col_character(),
    neg_log_adj_p = readr::col_double(),
    dataset = readr::col_character()
)

tukey_df <- readr::read_tsv(tukey_file, col_types = tukey_cols)

print(dim(tukey_df))
head(tukey_df, 4)

[1] 48484     9


term,comparison,estimate,conf.low,conf.high,adj.p.value,feature,neg_log_adj_p,dataset
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<chr>
Metadata_clone_type_indicator,1-0,0.78076946,0.66152296,0.900016,0.0,Nuclei_RadialDistribution_FracAtD_Mito_3of4,inf,bortezomib
Metadata_treatment_time,4 hr-13 hr,0.05503349,-0.07099084,0.1810578,0.3901031,Nuclei_RadialDistribution_FracAtD_Mito_3of4,0.4088207,bortezomib
Metadata_batch,2021_03_03_Batch13-2021_03_03_Batch12,-0.020822,-0.32241818,0.2807742,0.9999567,Nuclei_RadialDistribution_FracAtD_Mito_3of4,1.881967e-05,bortezomib
Metadata_batch,2021_03_03_Batch15-2021_03_03_Batch12,-0.15303419,-0.45233883,0.1462704,0.6824337,Nuclei_RadialDistribution_FracAtD_Mito_3of4,0.1659395,bortezomib


In [9]:
# Subset data to process dataset-specific signature
signature_subset_df <- signature_df %>%
    dplyr::filter(dataset == !!sig_dataset, final_signature)

tukey_subset_df <- tukey_df %>%
    dplyr::filter(
        dataset == !!sig_dataset,
        term == "Metadata_clone_type_indicator",
        feature %in% signature_subset_df$features
    ) %>%
    dplyr::arrange(desc(estimate))

# Ensure that the comparison is always resistant vs. senstive
# and never the other way around!
stopifnot(length(table(tukey_subset_df$comparison)) == 1)

# Determine feature direction
up_features <- tukey_subset_df %>% dplyr::filter(estimate > 0) %>% dplyr::pull(feature)
down_features <- tukey_subset_df %>% dplyr::filter(estimate < 0) %>% dplyr::pull(feature)

# Store signature for downstream analyses
signature_features <- list("up" = up_features, "down" = down_features)
signature_features

In [10]:
singscore_output = singscorePipeline(
    df = data_df,
    sig_feature_list = signature_features,
    num_permutations = num_permutations
)

full_results_df <- singscore_output[["results"]]
permuted <- singscore_output[["permuted"]]

# Get max and minimum values of permutation results
min_val <- quantile(as.vector(as.matrix(permuted)), 0.05)
max_val <- quantile(as.vector(as.matrix(permuted)), 0.95)

# Annotate some key metadata and store to list
sing_score_results_df <- full_results_df %>%
    dplyr::mutate(
        dataset = dataset,
        min_permuted_value = min_val,
        max_permuted_value = max_val
    )

“'tidy.numeric' is deprecated.
“`data_frame()` is deprecated as of tibble 1.1.0.
Please use `tibble()` instead.

In [11]:
sing_score_results_df %>% readr::write_tsv(output_results_file)

print(dim(sing_score_results_df))
head(sing_score_results_df)

[1] 200  28


Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_cell_density,Metadata_celltype_shorthand_from_plate_graph,Metadata_clone_number,Metadata_date,Metadata_plate_map_name,Metadata_time_to_adhere,⋯,TotalScore,TotalDispersion,UpScore,UpDispersion,DownScore,DownDispersion,Metadata_permuted_p_value,dataset,min_permuted_value,max_permuted_value
<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
221057,B05,2021_08_02_Batch24,1814,2.5x10^3 cells/well,4,WT clone 10,20210728,221057,48 hr,⋯,0.2996256,446.2626,0.2167039,170.499,0.082921696,275.7636,0.001,otherclones,-0.1544401,0.1590129
221057,B06,2021_08_02_Batch24,5481,2.5x10^3 cells/well,5,WT clone 12,20210728,221057,48 hr,⋯,-0.3962962,266.1267,-0.2885045,135.6579,-0.107791762,130.4688,1.0,otherclones,-0.1544401,0.1590129
221057,B10,2021_08_02_Batch24,1925,2.5x10^3 cells/well,4,WT clone 10,20210728,221057,48 hr,⋯,0.1322834,522.6165,0.1317894,255.7485,0.000493965,266.868,0.096,otherclones,-0.1544401,0.1590129
221057,B11,2021_08_02_Batch24,3910,2.5x10^3 cells/well,5,WT clone 12,20210728,221057,48 hr,⋯,-0.2183279,334.3263,-0.1071429,186.0663,-0.111185087,148.26,0.992,otherclones,-0.1544401,0.1590129
221057,C02,2021_08_02_Batch24,2230,2.5x10^3 cells/well,10,BZ007,20210728,221057,48 hr,⋯,0.2802541,309.1221,0.1936384,166.7925,0.086615695,142.3296,0.002,otherclones,-0.1544401,0.1590129
221057,C03,2021_08_02_Batch24,3910,2.5x10^3 cells/well,9,BZ006,20210728,221057,48 hr,⋯,-0.3509619,325.4307,-0.1503906,175.6881,-0.200571281,149.7426,0.999,otherclones,-0.1544401,0.1590129
