## Compile dataset "Batch 3"

**Gregory Way, 2020**

We acquired two plates in batch 3 - in each we collected either WT or Mutant clones

Here, we attempt to combine their raw measurements, normalize and output to apply the cloneAE signature in a later notebook.

In [1]:
import sys
import pathlib
import numpy as np
import pandas as pd

from pycytominer import normalize
from pycytominer.cyto_utils import infer_cp_features, output

sys.path.insert(0, "../2.describe-data/scripts")
from processing_utils import load_data

In [2]:
batch = "2019_06_25_Batch3"
plates = ["MutClones", "WTClones"]
suffix = "augmented.csv.gz"

data_dir = pathlib.Path("../0.generate-profiles/profiles")
cell_count_dir = pathlib.Path("../0.generate-profiles/cell_counts/")

output_file = pathlib.Path(f"data/{batch}_combined_normalized.csv.gz")

In [3]:
# Load and harmonize data for the given plates
df = load_data(
    batch=batch,
    plates=plates,
    profile_dir=data_dir,
    suffix=suffix,
    combine_dfs=True,
    harmonize_cols=True,
    add_cell_count=True,
    cell_count_dir=cell_count_dir
)

df = df.assign(
    Metadata_unique_sample_name=[f"profile_{x}_{batch}" for x in range(0, df.shape[0])]
)

print(df.shape)
df.head()

(105, 3536)


Unnamed: 0,Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_clone_number,Metadata_plate_map_name,Metadata_treatment,Cells_AreaShape_Area,Cells_AreaShape_Center_X,Cells_AreaShape_Center_Y,...,Nuclei_Texture_Variance_RNA_10_03,Nuclei_Texture_Variance_RNA_20_00,Nuclei_Texture_Variance_RNA_20_01,Nuclei_Texture_Variance_RNA_20_02,Nuclei_Texture_Variance_RNA_20_03,Nuclei_Texture_Variance_RNA_5_00,Nuclei_Texture_Variance_RNA_5_01,Nuclei_Texture_Variance_RNA_5_02,Nuclei_Texture_Variance_RNA_5_03,Metadata_unique_sample_name
0,MutClones,B02,2019_06_25_Batch3,9203,BZ001,MutClones,Untreated,2716.0,1043.0,995.0,...,21.556418,20.1773,20.251333,20.148085,20.01092,21.37969,20.984246,21.406505,21.023806,profile_0_2019_06_25_Batch3
1,MutClones,B03,2019_06_25_Batch3,27543,BZ002,MutClones,Untreated,2068.0,1053.0,1044.0,...,4.318845,3.444694,2.803674,3.46042,2.810974,4.179278,4.173528,4.200176,4.175119,profile_1_2019_06_25_Batch3
2,MutClones,B04,2019_06_25_Batch3,8086,BZ003,MutClones,Untreated,3161.5,1062.0,1087.0,...,15.142763,13.831991,13.46319,13.694243,12.70051,14.927255,14.552086,15.025471,14.709513,profile_2_2019_06_25_Batch3
3,MutClones,B05,2019_06_25_Batch3,19640,BZ004,MutClones,Untreated,2270.0,1075.0,1041.0,...,8.187377,7.080019,6.414685,6.966155,6.249959,7.999932,7.91098,8.03078,7.926279,profile_3_2019_06_25_Batch3
4,MutClones,B06,2019_06_25_Batch3,20440,BZ005,MutClones,Untreated,2301.0,1078.0,1078.0,...,6.556472,5.677998,5.281357,5.681753,5.20618,6.426894,6.342483,6.419077,6.321384,profile_4_2019_06_25_Batch3


In [4]:
normalized_df = df.groupby("Metadata_Plate").apply(
    lambda x: normalize(
        profiles=x,
        features="infer",
        samples="Metadata_clone_number == 'WT_parental'",
        method="standardize"
    )
)


output(
    df=normalized_df,
    output_filename=output_file,
    compression="gzip"
)

print(normalized_df.shape)
normalized_df.head()

(105, 3536)


Unnamed: 0,Metadata_Plate,Metadata_Well,Metadata_batch,Metadata_cell_count,Metadata_clone_number,Metadata_plate_map_name,Metadata_treatment,Metadata_unique_sample_name,Cells_AreaShape_Area,Cells_AreaShape_Center_X,...,Nuclei_Texture_Variance_RNA_10_02,Nuclei_Texture_Variance_RNA_10_03,Nuclei_Texture_Variance_RNA_20_00,Nuclei_Texture_Variance_RNA_20_01,Nuclei_Texture_Variance_RNA_20_02,Nuclei_Texture_Variance_RNA_20_03,Nuclei_Texture_Variance_RNA_5_00,Nuclei_Texture_Variance_RNA_5_01,Nuclei_Texture_Variance_RNA_5_02,Nuclei_Texture_Variance_RNA_5_03
0,MutClones,B02,2019_06_25_Batch3,9203,BZ001,MutClones,Untreated,profile_0_2019_06_25_Batch3,15.425583,-2.784089,...,50.276671,50.855203,71.997618,74.472724,68.817937,75.172657,51.122659,50.427995,51.060463,50.201707
1,MutClones,B03,2019_06_25_Batch3,27543,BZ002,MutClones,Untreated,profile_1_2019_06_25_Batch3,6.010408,-2.064066,...,2.114785,2.169167,2.830288,1.589384,2.696348,1.636899,1.94183,2.036691,1.997141,1.99568
2,MutClones,B04,2019_06_25_Batch3,8086,BZ003,MutClones,Untreated,profile_2_2019_06_25_Batch3,21.898516,-1.416045,...,32.036498,32.740388,45.768109,46.116913,43.245857,43.918108,32.673329,31.912389,32.865138,32.135791
3,MutClones,B05,2019_06_25_Batch3,19640,BZ004,MutClones,Untreated,profile_3_2019_06_25_Batch3,8.945385,-0.480015,...,12.776121,13.095502,17.857581,16.673501,16.587133,16.339759,12.866158,12.795313,12.919991,12.728179
4,MutClones,B06,2019_06_25_Batch3,20440,BZ005,MutClones,Untreated,profile_4_2019_06_25_Batch3,9.395802,-0.264008,...,8.155104,8.48915,12.062067,11.939301,11.497942,11.877239,8.368398,8.280239,8.324269,8.13639
