# Merge Batched Data

**Gregory Way, 2019**

Currently, the data were collected in two batches.
In this notebook, I merge the batches together and output `.csv` and `.gct` files.

In [1]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(magrittr))

In [2]:
util_file <- file.path("scripts", "processing_utils.R")
source(util_file)

## Load, Merge, and Output Batch 1 and Batch 2 Data

### Load Batch 1

In [3]:
batch <- "2019_02_15_Batch1_20X"
plate <- "HCT116bortezomib"

file <- file.path("..", "..", "backend", batch, plate, paste0(plate, "_normalized_variable_selected.csv"))
batch1_df <- load_data(data_file = file)

print(dim(batch1_df))
head(batch1_df, 2)

[1]  36 129


Metadata_Plate,Metadata_Well,Metadata_Assay_Plate_Barcode,Metadata_Plate_Map_Name,Metadata_Batch_Number,Metadata_well_position,Metadata_CellLine,Metadata_Dosage,Cells_AreaShape_Eccentricity,Cells_AreaShape_EulerNumber,⋯,Nuclei_RadialDistribution_RadialCV_ER_1of4,Nuclei_RadialDistribution_RadialCV_Mito_1of4,Nuclei_RadialDistribution_RadialCV_Mito_2of4,Nuclei_RadialDistribution_RadialCV_RNA_1of4,Nuclei_Texture_Correlation_AGP_20_00,Nuclei_Texture_Correlation_DNA_10_02,Nuclei_Texture_Correlation_DNA_10_03,Nuclei_Texture_Correlation_ER_20_03,Nuclei_Texture_Correlation_RNA_10_01,Nuclei_Texture_Correlation_RNA_10_03
<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
HCT116bortezomib,B03,HCT116bortezomib,PlateMap_HCT116bortezomib,1,B03,WT,0,-0.5389166,0.09930445,⋯,-0.5634461,-0.3636316,-0.5166978,-1.585785,-0.929867,-0.2607355,0.7951588,0.766441,-1.433559,-1.081219
HCT116bortezomib,B04,HCT116bortezomib,PlateMap_HCT116bortezomib,1,B04,WT,0,-0.432274,0.1042688,⋯,-0.7727316,-0.2259001,-0.5198452,-1.190355,-0.1833654,-0.1233108,0.8626262,0.3892313,-0.5505529,-0.514341


### Load Batch 2

In [4]:
batch <- "2019_03_20_Batch2"
plate <- "207106_exposure320"

file <- file.path("..", "..", "backend", batch, plate, paste0(plate, "_normalized_variable_selected.csv"))
batch2_df <- load_data(data_file = file)

print(dim(batch2_df))
head(batch2_df, 2)

[1]  36 171


Metadata_Plate,Metadata_Well,Metadata_Assay_Plate_Barcode,Metadata_Plate_Map_Name,Metadata_Batch_Number,Metadata_well_position,Metadata_CellLine,Metadata_Dosage,Cells_AreaShape_Compactness,Cells_AreaShape_Orientation,⋯,Nuclei_Texture_Correlation_DNA_10_01,Nuclei_Texture_Correlation_ER_10_01,Nuclei_Texture_Correlation_Mito_5_02,Nuclei_Texture_Correlation_Mito_5_03,Nuclei_Texture_Correlation_RNA_10_01,Nuclei_Texture_Correlation_RNA_10_03,Nuclei_Texture_Correlation_RNA_20_00,Nuclei_Texture_Entropy_ER_20_01,Nuclei_Texture_SumVariance_ER_20_01,Nuclei_Texture_SumVariance_RNA_20_01
<chr>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
207106_exposure320,B02,207106_exposure320,PlateMap_207106_exposure320,2,B02,WT,0,1.319133,0.9176338,⋯,0.7871451,0.2753495,0.23614066,0.12067149,0.4023975,0.6558692,0.7843925,-0.3963174,-0.05320719,0.6949548
207106_exposure320,B03,207106_exposure320,PlateMap_207106_exposure320,2,B03,WT,0,1.623088,0.3895463,⋯,0.7169757,-0.1075322,0.08524138,-0.007435339,-0.1742759,0.5114458,0.5909146,-0.4505974,0.02310763,0.9688375


### Merge Data

In [5]:
merge_file <- file.path("data", "merged_intersected_variable_selected.csv")
full_df <- merge_data(batch1_df, batch2_df, output_file = merge_file, output_gct = TRUE)

dim(full_df)

Loading required package: curl
“select_() is deprecated. 
Please use select() instead

The 'programming' vignette or the tidyeval book can help you
to program with select() : https://tidyeval.tidyverse.org
“`data_frame()` is deprecated, use `tibble()`.

In [6]:
# Output GCT file with combined columns
file <- paste0(tools::file_path_sans_ext(file), ".gct")
write_gct(full_df, file)

## Load, Merge, and Output Two Plates from Batch 3

### Load Mutated Plate

In [7]:
batch <- "2019_06_25_Batch3"
plate <- "MutClones"

file <- file.path("..", "..", "backend", batch, plate, paste0(plate, "_normalized_variable_selected.csv"))
batch3_mut_df <- load_data(file)

print(dim(batch3_mut_df))
head(batch3_mut_df, 2)

[1]  57 191


Metadata_Plate,Metadata_Well,Metadata_Assay_Plate_Barcode,Metadata_Plate_Map_Name,Metadata_well_position,Metadata_clone_number,Cells_AreaShape_Compactness,Cells_AreaShape_EulerNumber,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,⋯,Nuclei_Texture_Correlation_AGP_20_00,Nuclei_Texture_Correlation_AGP_20_03,Nuclei_Texture_Correlation_AGP_5_00,Nuclei_Texture_Correlation_ER_20_02,Nuclei_Texture_Correlation_Mito_5_00,Nuclei_Texture_Correlation_RNA_20_02,Nuclei_Texture_InfoMeas1_Mito_5_00,Nuclei_Texture_InverseDifferenceMoment_AGP_20_02,Nuclei_Texture_InverseDifferenceMoment_DNA_20_03,Nuclei_Texture_InverseDifferenceMoment_ER_20_02
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MutClones,B02,MutClones,MutClones,B02,BZ001,0.6034418,-3.05703,0.05068862,0.6585738,⋯,3.358292,1.126295,0.0,1.204256,0.4206682,0.5705776,-1.320394,-0.2247533,1.311671,0.212493
MutClones,B03,MutClones,MutClones,B03,BZ002,-1.0295421,-1.41911,1.11426953,-0.9345095,⋯,-0.3068762,1.536572,-4.29316,3.379792,-6.8157241,0.4181798,3.899485,2.1667509,2.50223,2.646517


### Load Wildtype Plate

In [8]:
plate <- "WTClones"

file <- file.path("..", "..", "backend", batch, plate, paste0(plate, "_normalized_variable_selected.csv"))
batch3_wt_df <- load_data(file)

print(dim(batch3_wt_df))
head(batch3_wt_df, 2)

[1]  48 191


Metadata_Plate,Metadata_Well,Metadata_Assay_Plate_Barcode,Metadata_Plate_Map_Name,Metadata_well_position,Metadata_clone_number,Cells_AreaShape_Compactness,Cells_AreaShape_EulerNumber,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,⋯,Nuclei_Texture_Correlation_AGP_20_00,Nuclei_Texture_Correlation_AGP_20_03,Nuclei_Texture_Correlation_AGP_5_00,Nuclei_Texture_Correlation_ER_20_02,Nuclei_Texture_Correlation_Mito_5_00,Nuclei_Texture_Correlation_RNA_20_02,Nuclei_Texture_InfoMeas1_Mito_5_00,Nuclei_Texture_InverseDifferenceMoment_AGP_20_02,Nuclei_Texture_InverseDifferenceMoment_DNA_20_03,Nuclei_Texture_InverseDifferenceMoment_ER_20_02
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
WTClones,B02,WTClones,WTClones,B02,WT001,-0.6816556,0.2433688,0.5543276,0.27729305,⋯,-1.992909,-0.5624821,-0.7311029,-1.798579,-2.05401899,-1.2708266,0.67166455,-0.08257108,0.4544209,-0.6100711
WTClones,B03,WTClones,WTClones,B03,WT001,0.05125114,-0.4970395,-0.10453,0.07257513,⋯,-1.922169,-0.7716781,-0.1594808,-1.284761,0.01782447,-0.6921738,-0.06878273,-0.44354985,0.2891828,-0.5773808


### Merge Data

In [9]:
merge_file <- file.path("data", paste0(batch, "_merged_intersected_variable_selected.csv"))
full_df <- merge_data(batch3_mut_df, batch3_wt_df, output_file = merge_file, output_gct = TRUE)

dim(full_df)