In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(singscore))

source(file.path("../3.bulk-signatures/utils", "singscore_utils.R"))

In [2]:
seed <- 1234
num_permutations <- 1000
dataset <- "bortezomib"

data_dir <- "data"
data_file <- file.path(data_dir, paste0(dataset, "_signature_analytical_set.tsv.gz"))

input_results_dir <- file.path("results", "signatures")
signature_file <- file.path(input_results_dir, paste0("signature_summary_", dataset, "_signature.tsv"))
tukey_file <- file.path(input_results_dir, paste0("tukey_results_", dataset, "_signature.tsv.gz"))

output_dir <- file.path("results", "singscore")
output_results_file <- file.path(output_dir, paste0("singscore_results", dataset, ".tsv.gz"))

In [3]:
set.seed(seed)

In [4]:
# Load profiles
bulk_col_types <- readr::cols(
    .default = readr::col_double(),
    Metadata_Plate = readr::col_character(),
    Metadata_Well = readr::col_character(),
    Metadata_cell_count = readr::col_integer(),
    Metadata_batch = readr::col_character(),
    Metadata_clone_number = readr::col_character(),
    Metadata_plate_map_name = readr::col_character(),
    Metadata_treatment = readr::col_character(),
    Metadata_dataset = readr::col_character(),
    Metadata_clone_type = readr::col_character(),
    Metadata_clone_type_indicator = readr::col_character(),
    Metadata_model_split = readr::col_character(),
    Metadata_cell_density = readr::col_character(),
    Metadata_treatment_time = readr::col_character(),
    Metadata_unique_sample_name = readr::col_character(),
    Metadata_time_to_adhere = readr::col_character()
)

data_df <- readr::read_tsv(data_file, col_types = bulk_col_types)

print(dim(data_df))
head(data_df, 4)

[1] 165 272


Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_cell_density,Metadata_celltype_shorthand_from_plate_graph,Metadata_clone_number,Metadata_date,Metadata_plate_map_name,Metadata_time_to_adhere,⋯,Nuclei_RadialDistribution_MeanFrac_Mito_1of4,Nuclei_RadialDistribution_MeanFrac_Mito_4of4,Nuclei_RadialDistribution_MeanFrac_RNA_1of4,Nuclei_RadialDistribution_MeanFrac_RNA_4of4,Nuclei_RadialDistribution_RadialCV_DNA_1of4,Nuclei_Texture_Correlation_DNA_10_03,Nuclei_Texture_Correlation_ER_10_00,Nuclei_Texture_Correlation_ER_10_01,Nuclei_Texture_Correlation_ER_10_03,Nuclei_Texture_Correlation_Mito_10_02
<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
219814,B10,2021_02_08_Batch11,12453,2.5x10^3 cells/well,4,WT clone 01,20210205,219814,48 hr,⋯,0.920735,2.29247,2.786923,0.2150331,1.801948,2.8111508,-0.372571,-0.41285981,-0.41128528,1.5414147
219814,B11,2021_02_08_Batch11,2914,2.5x10^3 cells/well,5,WT clone 02,20210205,219814,48 hr,⋯,0.4977887,-1.757208,1.13621,-4.1661807,2.112966,0.1271175,0.2419688,-0.07904743,-0.07081247,1.9853635
219814,C02,2021_02_08_Batch11,6314,2.5x10^3 cells/well,10,BZ002,20210205,219814,48 hr,⋯,3.4329133,2.349513,2.21048,6.0104808,1.421137,3.3594385,0.0205416,-0.18167256,-0.18991518,0.8667502
219814,C03,2021_02_08_Batch11,4275,2.5x10^3 cells/well,9,BZ001,20210205,219814,48 hr,⋯,5.8284016,-4.883283,3.258141,-5.7022461,3.305521,-2.6333408,0.3541864,-0.35552921,-0.37832162,1.8785706


In [5]:
# Load signatures
sig_col_types <- readr::cols(
    features = readr::col_character(),
    non_specific_exclude = readr::col_logical(),
    final_signature = readr::col_logical(),
    dataset = readr::col_character()
)

signature_df <- readr::read_tsv(signature_file, col_types = sig_col_types)

print(dim(signature_df))
head(signature_df, 4)

[1] 260   6


features,non_status_significant_exclude,cell_count_exclude,non_specific_exclude,final_signature,dataset
<chr>,<lgl>,<lgl>,<lgl>,<lgl>,<chr>
Cells_AreaShape_Compactness,False,False,True,False,bortezomib
Cells_AreaShape_Eccentricity,False,False,True,False,bortezomib
Cells_AreaShape_MeanRadius,True,False,False,False,bortezomib
Cells_AreaShape_MedianRadius,True,False,False,False,bortezomib


In [6]:
# Load Tukey results (to determine if feature is "up" or "down")
tukey_cols <- readr::cols(
    term = readr::col_character(),
    comparison = readr::col_character(),
    estimate = readr::col_double(),
    conf.low = readr::col_double(),
    conf.high = readr::col_double(),
    adj.p.value = readr::col_double(),
    feature = readr::col_character(),
    neg_log_adj_p = readr::col_double(),
    dataset = readr::col_character()
)

tukey_df <- readr::read_tsv(tukey_file, col_types = tukey_cols)

print(dim(tukey_df))
head(tukey_df, 4)

[1] 11684     9


term,comparison,estimate,conf.low,conf.high,adj.p.value,feature,neg_log_adj_p,dataset
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<chr>
Metadata_clone_type_indicator,1-0,-0.2513397,-0.3572613,-0.145418,7.715345e-05,Cytoplasm_AreaShape_MedianRadius,4.112645,bortezomib
Metadata_clone_number,BZ002-BZ001,1.8594766,1.4574061,2.261547,1.950795e-11,Cytoplasm_AreaShape_MedianRadius,10.709788,bortezomib
Metadata_clone_number,BZ003-BZ001,2.0390049,1.6369345,2.441075,3.2081e-12,Cytoplasm_AreaShape_MedianRadius,11.493752,bortezomib
Metadata_clone_number,BZ004-BZ001,1.8594766,1.4574061,2.261547,1.950795e-11,Cytoplasm_AreaShape_MedianRadius,10.709788,bortezomib


In [7]:
# Subset data to process dataset-specific signature
signature_subset_df <- signature_df %>%
    dplyr::filter(dataset == !!dataset, final_signature)

tukey_subset_df <- tukey_df %>%
    dplyr::filter(
        dataset == !!dataset,
        term == "Metadata_clone_type_indicator",
        feature %in% signature_subset_df$features
    )

# Ensure that the comparison is always resistant vs. senstive
# and never the other way around!
stopifnot(length(table(tukey_subset_df$comparison)) == 1)

# Determine feature direction
up_features <- tukey_subset_df %>% dplyr::filter(estimate > 0) %>% dplyr::pull(feature)
down_features <- tukey_subset_df %>% dplyr::filter(estimate < 0) %>% dplyr::pull(feature)

# Store signature for downstream analyses
signature_features <- list("up" = up_features, "down" = down_features)

In [8]:
signature_features

In [9]:
singscore_output = singscorePipeline(
    df = data_df,
    sig_feature_list = signature_features,
    num_permutations = num_permutations
)

full_results_df <- singscore_output[["results"]]
permuted <- singscore_output[["permuted"]]

# Get max and minimum values of permutation results
min_val <- quantile(as.vector(as.matrix(permuted)), 0.05)
max_val <- quantile(as.vector(as.matrix(permuted)), 0.95)

# Annotate some key metadata and store to list
sing_score_results_df <- full_results_df %>%
    dplyr::mutate(
        dataset = dataset,
        min_permuted_value = min_val,
        max_permuted_value = max_val
    )

“'tidy.numeric' is deprecated.
“`data_frame()` is deprecated as of tibble 1.1.0.
Please use `tibble()` instead.

In [10]:
sing_score_results_df %>% readr::write_tsv(output_results_file)

print(dim(sing_score_results_df))
head(sing_score_results_df)

[1] 165  28


Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_cell_density,Metadata_celltype_shorthand_from_plate_graph,Metadata_clone_number,Metadata_date,Metadata_plate_map_name,Metadata_time_to_adhere,⋯,TotalScore,TotalDispersion,UpScore,UpDispersion,DownScore,DownDispersion,Metadata_permuted_p_value,dataset,min_permuted_value,max_permuted_value
<chr>,<chr>,<chr>,<int>,<chr>,<dbl>,<chr>,<dbl>,<chr>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
219814,B10,2021_02_08_Batch11,12453,2.5x10^3 cells/well,4,WT clone 01,20210205,219814,48 hr,⋯,-0.31530097,126.021,-0.21370735,60.7866,-0.101593625,65.2344,0.941,bortezomib,-0.3411847,0.3310993
219814,B11,2021_02_08_Batch11,2914,2.5x10^3 cells/well,5,WT clone 02,20210205,219814,48 hr,⋯,-0.59142954,17.7912,-0.3264893,10.3782,-0.264940239,7.413,1.0,bortezomib,-0.3411847,0.3310993
219814,C02,2021_02_08_Batch11,6314,2.5x10^3 cells/well,10,BZ002,20210205,219814,48 hr,⋯,-0.01963229,68.1996,-0.01764025,60.7866,-0.001992032,7.413,0.536,bortezomib,-0.3411847,0.3310993
219814,C03,2021_02_08_Batch11,4275,2.5x10^3 cells/well,9,BZ001,20210205,219814,48 hr,⋯,-0.10345355,100.8168,-0.11075766,53.3736,0.007304117,47.4432,0.677,bortezomib,-0.3411847,0.3310993
219814,C04,2021_02_08_Batch11,3869,2.5x10^3 cells/well,8,WT clone 05,20210205,219814,48 hr,⋯,-0.45474781,29.652,-0.2854251,25.2042,-0.169322709,4.4478,0.988,bortezomib,-0.3411847,0.3310993
219814,C05,2021_02_08_Batch11,7621,2.5x10^3 cells/well,7,WT clone 04,20210205,219814,48 hr,⋯,-0.36497388,19.2738,-0.22353962,17.7912,-0.141434263,1.4826,0.962,bortezomib,-0.3411847,0.3310993
