In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(ggridges))
suppressPackageStartupMessages(library(corrplot))

In [2]:
set.seed(123)

In [7]:
# Read in both btches of data
profile_dir <- file.path("..", "..", "..", "backend", "2019_05_13_Batch2")
profile_file <- file.path(profile_dir, "BR00103268/BR00103268_normalized_variable_selected.csv")
profile_file

In [12]:
profile_cols <- readr::cols(
    .default = readr::col_double(),
    Metadata_Plate = readr::col_character(),
    Metadata_Well = readr::col_character(),
    Metadata_Assay_Plate_Barcode = readr::col_character(),
    Metadata_Plate_Map_Name = readr::col_character(),
    Metadata_well_position = readr::col_character(),
    Metadata_plating_density = readr::col_integer(),
    Metadata_line_ID = readr::col_character(),
    Metadata_timepoint = readr::col_integer()
)

profile_df <- readr::read_csv(profile_file,
                              col_types = profile_cols)
               
dim(profile_df)
head(profile_df, 2)

Metadata_Plate,Metadata_Well,Metadata_Assay_Plate_Barcode,Metadata_Plate_Map_Name,Metadata_well_position,Metadata_plating_density,Metadata_line_ID,Metadata_timepoint,Cells_AreaShape_Compactness,Cells_AreaShape_EulerNumber,⋯,Nuclei_Texture_InfoMeas2_Mito_5_00,Nuclei_Texture_InverseDifferenceMoment_DNA_20_03,Nuclei_Texture_SumAverage_Brightfield_20_02,Nuclei_Texture_SumAverage_DNA_20_02,Nuclei_Texture_SumAverage_ER_20_03,Nuclei_Texture_SumEntropy_ER_20_01,Nuclei_Texture_SumVariance_AGP_20_03,Nuclei_Texture_SumVariance_DNA_20_01,Nuclei_Texture_SumVariance_ER_20_03,Nuclei_Texture_SumVariance_Mito_20_01
<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<int>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
BR00103268,A01,BR00103268,cmQTL5-8_mt,A01,1000,A,24,-2.130747,0.8924824,⋯,2.009704,2.759381,2.494499,-3.4629672,3.141394,0.7225121,6.309485,-1.465593,-0.5403595,3.499636
BR00103268,A02,BR00103268,cmQTL5-8_mt,A02,1000,A,24,-1.200004,0.8924824,⋯,1.822471,-1.096467,1.348158,-0.1647277,4.130532,1.5059014,3.251157,1.186102,1.5884533,4.807874


In [18]:
metadata_df <- profile_df %>%
    dplyr::select(starts_with("Metadata_"))

head(metadata_df)

Metadata_Plate,Metadata_Well,Metadata_Assay_Plate_Barcode,Metadata_Plate_Map_Name,Metadata_well_position,Metadata_plating_density,Metadata_line_ID,Metadata_timepoint
<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<int>
BR00103268,A01,BR00103268,cmQTL5-8_mt,A01,1000,A,24
BR00103268,A02,BR00103268,cmQTL5-8_mt,A02,1000,A,24
BR00103268,A03,BR00103268,cmQTL5-8_mt,A03,3000,B,24
BR00103268,A04,BR00103268,cmQTL5-8_mt,A04,3000,B,24
BR00103268,A05,BR00103268,cmQTL5-8_mt,A05,1000,E,24
BR00103268,A06,BR00103268,cmQTL5-8_mt,A06,1000,E,24


In [11]:
# Read in single cell data
file <- file.path("data", "single_cell_isolated_profiles.tsv.gz")
isolated_sc_df <- readr::read_tsv(file) %>%
    dplyr::left_join(
        metadata_df,
        by = c("Metadata_Plate", "Metadata_Well")
    )

dim(isolated_sc_df)
head(isolated_sc_df, 2)

Parsed with column specification:
cols(
  .default = col_double(),
  FileName_CellOutlines = [31mcol_character()[39m,
  FileName_IllumAGP = [31mcol_character()[39m,
  FileName_IllumBrightfield = [31mcol_character()[39m,
  FileName_IllumDNA = [31mcol_character()[39m,
  FileName_IllumER = [31mcol_character()[39m,
  FileName_IllumMito = [31mcol_character()[39m,
  FileName_IllumRNA = [31mcol_character()[39m,
  FileName_NucleiOutlines = [31mcol_character()[39m,
  FileName_OrigAGP = [31mcol_character()[39m,
  FileName_OrigBrightfield = [31mcol_character()[39m,
  FileName_OrigDNA = [31mcol_character()[39m,
  FileName_OrigER = [31mcol_character()[39m,
  FileName_OrigMito = [31mcol_character()[39m,
  FileName_OrigRNA = [31mcol_character()[39m,
  MD5Digest_IllumAGP = [31mcol_character()[39m,
  MD5Digest_IllumBrightfield = [31mcol_character()[39m,
  MD5Digest_IllumDNA = [31mcol_character()[39m,
  MD5Digest_IllumER = [31mcol_character()[39m,
  MD5Digest_IllumMit

Metadata_Plate,Metadata_Well,Metadata_Assay_Plate_Barcode,Metadata_Plate_Map_Name,Metadata_well_position,Metadata_plating_density,Metadata_line_ID,Metadata_timepoint
<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<int>
BR00103268,A01,BR00103268,cmQTL5-8_mt,A01,1000,A,24
BR00103268,A02,BR00103268,cmQTL5-8_mt,A02,1000,A,24


In [59]:
cp_metadata <-
 colnames(isolated_sc_df) %>%
    stringr::str_subset("^Metadata_")

cp_features <-
 colnames(isolated_sc_df) %>%
    stringr::str_subset("^Nuclei_|^Cells_|^Cytoplasm_")

In [37]:
# Create a dataframe of variables for each group
group_id_df <- metadata_df %>%
    dplyr::select(Metadata_line_ID,
                  Metadata_timepoint,
                  Metadata_plating_density) %>%
    dplyr::count()

dim(group_id_df)
head(group_id_df)

well_id,group_id,Metadata_line_ID,Metadata_timepoint,Metadata_plating_density
<int>,<int>,<chr>,<int>,<int>
1,1,A,24,1000
2,1,A,24,1000
3,7,B,24,3000
4,7,B,24,3000
5,17,E,24,1000
6,17,E,24,1000


In [43]:
for (well in group_id_df$well_id) {
    print(well)
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] 45
[1] 46
[1] 47
[1] 48
[1] 49
[1] 50
[1] 51
[1] 52
[1] 53
[1] 54
[1] 55
[1] 56
[1] 57
[1] 58
[1] 59
[1] 60
[1] 61
[1] 62
[1] 63
[1] 64
[1] 65
[1] 66
[1] 67
[1] 68
[1] 69
[1] 70
[1] 71
[1] 72
[1] 73
[1] 74
[1] 75
[1] 76
[1] 77
[1] 78
[1] 79
[1] 80
[1] 81
[1] 82
[1] 83
[1] 84
[1] 85
[1] 86
[1] 87
[1] 88
[1] 89
[1] 90
[1] 91
[1] 92
[1] 93
[1] 94
[1] 95
[1] 96
[1] 97
[1] 98
[1] 99
[1] 100
[1] 101
[1] 102
[1] 103
[1] 104
[1] 105
[1] 106
[1] 107
[1] 108
[1] 109
[1] 110
[1] 111
[1] 112
[1] 113
[1] 114
[1] 115
[1] 116
[1] 117
[1] 118
[1] 119
[1] 120
[1] 121
[1] 122
[1] 123
[1] 124
[1] 125
[1] 126
[1] 127
[1] 128
[1] 129
[1] 130
[1] 131
[1] 132
[1] 133
[1] 134
[1] 135
[1] 136
[1] 137
[1] 138
[1] 

In [56]:
subset_metadata_df <- metadata_df %>%
    dplyr::filter(well_id == !!well)

well_num <- unique(subset_metadata_df$Metadata_Well)

test <- isolated_sc_df %>%
    dplyr::filter(Metadata_well_position == !!well_num) %>%
            dplyr::select(cp_features) %>%
            t() %>%
            cor() %>%
            dplyr::as_tibble() %>%
            magrittr::set_colnames(subset_metadata_df$dictionary_id)

In [65]:
install.packages("reshape2")

Installing package into ‘/home/ubuntu/R/library’
(as ‘lib’ is unspecified)
“installation of package ‘reshape2’ had non-zero exit status”

In [63]:
subset_metadata_df %>%
    dplyr::bind_cols(test) %>%
    dplyr::select(-cp_metadata) %>%
    reshape2::melt(id.vars = 'dictionary_id',
                   variable.name = 'correlation_id', 
                   value.name = "pearson_cor")

ERROR: Error: package ‘reshape2’ was installed by an R version with different internals; it needs to be reinstalled for use with this R version


In [41]:
cor_melt_df <- metadata_df %>%
    dplyr::select(-well_id,
                  -group_id,
                  -condition_group_id) %>%
    dplyr::bind_cols(
        isolated_sc_df %>%
            dplyr::select(cp_features) %>%
            t() %>%
            cor() %>%
            dplyr::as_tibble() %>%
            magrittr::set_colnames(metadata_df$dictionary_id)
    ) %>%
    dplyr::select(-cp_metadata) %>%
    reshape2::melt(id.vars = 'dictionary_id',
                   variable.name = 'correlation_id', 
                   value.name = "pearson_cor")

dim(cor_melt_df)
head(cor_melt_df)

ERROR: Error: cannot allocate vector of size 8.3 Gb
