# Normalize Cell Health Labels

**Gregory Way, 2019**

In [1]:
import os
import numpy as np
from scipy.stats import median_absolute_deviation
import pandas as pd

In [2]:
%matplotlib inline

In [3]:
# Function to scale cell health target variables
def mad_scale(x):
    x_median = np.nanmedian(x)
    x_mad = median_absolute_deviation(x, nan_policy="omit")
    x_mad_scale = (x - x_median) / x_mad
    return x_mad_scale

In [4]:
file = os.path.join("data", "labels", "cell_health_labels.tsv")
label_df = pd.read_csv(file, sep='\t')

print(label_df.shape)
label_df.head(2)

(2302, 75)


Unnamed: 0,cell_id,guide,plate_name,well_col,well_row,cc_all_high_n_spots_h2ax_mean,cc_all_large_notround_polynuclear_mean,cc_all_large_round_polyploid_mean,cc_all_n_objects,cc_all_n_spots_mean,...,vb_num_live_cells,vb_percent_all_apoptosis,vb_percent_all_early_apoptosis,vb_percent_all_late_apoptosis,vb_percent_caspase_dead_only,vb_percent_dead,vb_percent_dead_only,vb_percent_live,vb_ros_back_mean,vb_ros_mean
0,ES2,AKT1-1,Plate 1,3,A,0.04287,0.007976,0.003988,1003,1.777,...,1465.0,0.0271,0.0119,0.0152,1.64,0.03173,0.01652,0.9683,,
1,ES2,AKT1-1,Plate 1,22,O,0.02635,0.005988,0.005988,835,1.582,...,1575.0,0.03169,0.01463,0.01706,1.405,0.03961,0.02255,0.9598,279.6,2083.0


In [5]:
# Apply normalization by plate
normalized_label_df = (
    label_df
    .drop(["guide", "well_col", "well_row"], axis="columns")
    .groupby(["cell_id", "plate_name"])
    .transform(mad_scale)
)

normalized_label_df = pd.concat(
    [
        label_df.loc[:, ["cell_id", "guide", "plate_name", "well_col", "well_row"]],
        normalized_label_df
    ],
    axis="columns"
)

print(normalized_label_df.shape)
normalized_label_df.head(2)

(2302, 75)


Unnamed: 0,cell_id,guide,plate_name,well_col,well_row,cc_all_high_n_spots_h2ax_mean,cc_all_large_notround_polynuclear_mean,cc_all_large_round_polyploid_mean,cc_all_n_objects,cc_all_n_spots_mean,...,vb_num_live_cells,vb_percent_all_apoptosis,vb_percent_all_early_apoptosis,vb_percent_all_late_apoptosis,vb_percent_caspase_dead_only,vb_percent_dead,vb_percent_dead_only,vb_percent_live,vb_ros_back_mean,vb_ros_mean
0,ES2,AKT1-1,Plate 1,3,A,1.031168,-0.942186,-1.467284,-0.55779,0.442289,...,0.388243,-1.551538,-1.262443,-1.438342,-1.147748,-0.992046,-0.053653,0.990919,,
1,ES2,AKT1-1,Plate 1,22,O,-0.39554,-1.359908,-0.919586,-0.89009,-0.63579,...,0.750165,-1.232042,-0.915017,-1.210656,-1.334664,-0.331799,0.870706,0.28312,-0.990856,0.03592


In [6]:
# Write to file
file = os.path.join("data", "labels", "normalized_cell_health_labels.tsv")
normalized_label_df.to_csv(file, index=False, sep='\t')