# Train Classifier Models to Predict Cell Health Phenotypes

**Gregory Way, 2019**

In [1]:
import os
import numpy as np
import pandas as pd
import warnings
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDClassifier, ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.exceptions import ConvergenceWarning

from scripts.ml_utils import load_train_test, CellHealthPredict



In [2]:
warnings.filterwarnings("ignore", category=ConvergenceWarning)

In [3]:
np.random.seed(123)

## Load Data

In [4]:
consensus = "modz"

In [5]:
x_train_df, x_test_df, y_train_df, y_test_df = (
    load_train_test(drop_metadata=True, consensus=consensus)
)
x_meta_train_df, x_meta_test_df, y_meta_train_df, y_meta_test_df = (
    load_train_test(output_metadata_only=True, consensus=consensus)
)

In [6]:
cell_lines = list(set(x_meta_train_df.Metadata_cell_line))
cell_lines

['ES2', 'HCC44', 'A549']

In [7]:
print(x_train_df.shape)
x_train_df.head(3)

(303, 949)


Unnamed: 0_level_0,Cells_AreaShape_Center_Y,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_Extent,Cells_AreaShape_Orientation,Cells_AreaShape_Zernike_0_0,Cells_AreaShape_Zernike_1_1,Cells_AreaShape_Zernike_2_0,Cells_AreaShape_Zernike_2_2,Cells_AreaShape_Zernike_3_3,...,Nuclei_Texture_SumEntropy_RNA_5_0,Nuclei_Texture_SumVariance_AGP_20_0,Nuclei_Texture_SumVariance_AGP_5_0,Nuclei_Texture_SumVariance_DNA_10_0,Nuclei_Texture_SumVariance_DNA_20_0,Nuclei_Texture_SumVariance_DNA_5_0,Nuclei_Texture_Variance_AGP_5_0,Nuclei_Texture_Variance_DNA_10_0,Nuclei_Texture_Variance_DNA_20_0,Nuclei_Texture_Variance_DNA_5_0
Metadata_profile_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
profile_340,0.029292,0.607142,-0.576461,-2.459725,0.587515,-1.18337,0.257162,0.741291,-3.732722,-0.012861,...,-2.964612,-2.131689,-2.073252,-2.623642,-2.597662,-2.869573,-2.367492,-3.110212,-3.150354,-2.901882
profile_6,-0.0879,0.186519,0.5622,-0.060381,-0.206748,-0.318174,0.682207,-0.12153,-0.266203,-0.494876,...,-0.030618,-0.285945,-0.297946,-0.254188,-0.020307,-0.190692,-0.358839,-0.388787,-0.318564,-0.311846
profile_79,0.23359,1.738854,1.82946,-1.535546,0.285378,-1.958437,-0.673058,-0.674018,0.43034,-0.227258,...,-1.216083,-1.014071,-1.006968,-0.7635,-0.436459,-0.72737,-0.96855,-0.793551,-0.722493,-0.738353


## Setup Cross Validation

In [8]:
alphas = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
l1_ratios = [0.1, 0.12, 0.14, 0.16, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9]
n_folds = 5

In [9]:
regression_parameters = {
    'regress__alpha': alphas,
    'regress__l1_ratio': l1_ratios
}

clf_parameters = {
    'classify__loss': ['log'],
    'classify__penalty': ['elasticnet'],
    'classify__alpha': alphas,
    'classify__l1_ratio': l1_ratios
}

In [10]:
estimator_regressor = Pipeline(
    steps=[(
        "regress",
        ElasticNet(
            random_state=42,
            max_iter=2000,
            tol=1e-3
        )
        
    )]
)

estimator_classifier = Pipeline(
    steps=[(
        'classify',
        SGDClassifier(
            random_state=42,
            class_weight='balanced',
            max_iter=2000,
            shuffle=True,
            tol=1e-3
        )
    )]
)

In [11]:
# y labels and transform instructions
cell_health_targets = y_train_df.columns.tolist()
y_transforms = ["raw", "binarize"]
binarize_fit = "sd"

## Train Models

In [12]:
%%time

cv_results_list = []
roc_results_list = []
pr_results_list = []
all_coefs_list = []
all_y_label_list = []
regression_results_list = []
cannot_fit_list = []

for cell_health_target in cell_health_targets:
    
    for y_transform in y_transforms:

        if y_transform == "binarize":
            estimator = estimator_classifier
            scoring = "roc_auc"
            parameters = clf_parameters
            decision_function = True
            model_type = "Classification"
        else:
            estimator = estimator_regressor
            scoring = "r2"
            parameters = regression_parameters
            decision_function = False
            model_type = "Regression"
    
        for shuffle_type in [True, False]:
            
            print("Now Training Target: {}".format(cell_health_target))
            print("[Class] Model: {}; Shuffle: {}\n".format(model_type, shuffle_type))
            
            # Initialize class
            chp = CellHealthPredict(
                x_df=x_train_df,
                y_df=y_train_df,
                parameters=parameters,
                estimator=estimator,
                n_folds=n_folds,
                cv_scoring=scoring,
                shuffle=shuffle_type
            )

            # Fit model
            is_fit = chp.fit_cell_health_target(
                cell_health_target,
                y_transform=y_transform,
                binarize_fit=binarize_fit
            )
            
            # Identify binary threshold [default is 1.5 standard deviations]
            if y_transform == "binarize": 
                binarize_threshold = chp.binarize_cutoff
            else:
                binarize_threshold = None
                
            if not is_fit:
                cannot_fit_list.append([cell_health_target, y_transform, shuffle_type])
                continue

            # Training performance metrics
            metric_a, metric_b, y_true, y_pred = chp.get_performance(
                decision_function=decision_function,
                return_y=True,
                binarize_fit=binarize_fit,
                binarize_fit_override=binarize_threshold,
            )
        
            # Testing performance metrics
            metric_test_a, metric_test_b, y_test_true, y_test_pred = chp.get_performance(
                x_test=x_test_df,
                y_test=y_test_df,
                decision_function=decision_function,
                return_y=True,
                binarize_fit=binarize_fit,
                binarize_fit_override=binarize_threshold,
                data_fit_type="test",
            )

            # Get Cell Line Specific Performance
            cell_line_metrics_a = []
            cell_line_metrics_b = []
            for cell_line in cell_lines:
                meta_train_subset_df = x_meta_train_df.query("Metadata_cell_line == @cell_line")
                meta_test_subset_df = x_meta_test_df.query("Metadata_cell_line == @cell_line")

                # Get Cell Line Specific Training Performance
                x_cell_line_df = x_train_df.reindex(meta_train_subset_df.index, axis="rows")
                y_cell_line_df = y_train_df.reindex(meta_train_subset_df.index, axis="rows")

                metric_cell_train_a, metric_cell_train_b, y_cell_train_true, y_cell_train_pred = (
                    chp.get_performance(
                        x_test=x_cell_line_df,
                        y_test=y_cell_line_df,
                        decision_function=decision_function,
                        return_y=True,
                        binarize_fit=binarize_fit,
                        cell_line=cell_line,
                        binarize_fit_override=binarize_threshold,
                    )
                )

                # Get Cell Line Specific Test Performance
                x_cell_line_df = x_test_df.reindex(meta_test_subset_df.index, axis="rows")
                y_cell_line_df = y_test_df.reindex(meta_test_subset_df.index, axis="rows")

                metric_cell_test_a, metric_cell_test_b, y_cell_test_true, y_cell_test_pred = (
                    chp.get_performance(
                        x_test=x_cell_line_df,
                        y_test=y_cell_line_df,
                        decision_function=decision_function,
                        return_y=True,
                        binarize_fit=binarize_fit,
                        cell_line=cell_line,
                        binarize_fit_override=binarize_threshold,
                        data_fit_type="test",
                    )
                )

                cell_line_metrics_a += [metric_cell_train_a, metric_cell_test_a]
                cell_line_metrics_b += [metric_cell_train_b, metric_cell_test_b]

            # Combine training and testing results
            if y_transform == "binarize":
                roc_results_list.append(pd.concat([metric_a, metric_test_a], axis='rows'))
                roc_results_list.append(pd.concat(cell_line_metrics_a, axis="rows"))
                pr_results_list.append(pd.concat([metric_b, metric_test_b], axis='rows'))
                pr_results_list.append(pd.concat(cell_line_metrics_b, axis="rows"))
            else:
                regression_results_list.append(pd.concat([metric_a, metric_test_a], axis='rows'))
                regression_results_list.append(pd.concat([metric_b, metric_test_b], axis='rows'))
                regression_results_list.append(pd.concat(cell_line_metrics_a, axis='rows'))
                regression_results_list.append(pd.concat(cell_line_metrics_b, axis="rows"))

            # Save cross validation results
            cv_results_list.append(chp.get_cv_results())

            # Save the model coefficients
            model_file = "cell_health_{}_target_{}_shuffle_{}_transform_{}.joblib".format(
                consensus, cell_health_target, shuffle_type, y_transform
            )
            model_file = os.path.join("models", model_file)
            coef_df = chp.get_coefficients(save_model=True, model_file=model_file)
            all_coefs_list.append(coef_df)
        
            # Store y predictions recoded values
            all_y_label_list.append(pd.concat([y_true, y_test_true, y_pred, y_test_pred]))

Now Training Target: cc_all_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_all_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_all_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_all_large_notround_polynuclear_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_large_notround_polynuclear_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_all_large_notround_polynuclear_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_all_large_notround_polynuclear_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_all_large_round_polyploid_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_large_round_polyploid_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_all_large_round_polyploid_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_all_large_round_polyploid_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_all_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_all_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_all_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_all_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_all_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_all_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_all_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_all_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_all_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_all_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_all_nucleus_area_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_all_nucleus_area_mean
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_all_nucleus_roundness_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_all_nucleus_roundness_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: 

  recall = tps / tps[-1]


Now Training Target: cc_cc_g2
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_cc_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_cc_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_cc_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_cc_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_cc_late_mitosis
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_cc_late_mitosis
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_cc_late_mitosis
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_cc_late_mitosis
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_cc_mitosis
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_cc_mitosis
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_cc_mitosis
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_cc_mitosis
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_cc_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_cc_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_cc_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_cc_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_cc_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_cc_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_cc_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_cc_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_cc_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_cc_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_cc_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_cc_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_cc_s
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_cc_s
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_cc_s
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_cc_s
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_early_mitosis_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_early_mitosis_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_early_mitosis_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_early_mitosis_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_early_mitosis_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_early_mitosis_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_early_mitosis_n_objects
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_early_mitosis_n_objects
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_early_mitosis_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_early_mitosis_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_early_mitosis_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_early_mitosis_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_early_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_early_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: Fals

  recall = tps / tps[-1]


Now Training Target: cc_g1_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_g1_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g1_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g1_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_g1_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_g1_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g1_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g1_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_g1_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_g1_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g1_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g1_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_g1_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_g1_plus_g2_count
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g1_plus_g2_count
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g1_plus_g2_count
[Class] 

  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_g2_g1_count
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_g2_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g2_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g2_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_g2_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_g2_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g2_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g2_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_g2_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_g2_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g2_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g2_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_g2_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_g2_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g2_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g2_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_g2_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_g2_plus_all_m_count
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_g2_plus_all_m_count
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_g2_plus_all_m_count
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_g2_plus_all_m_count
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_infection_percentage
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_infection_percentage
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_infection_percentage
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_infection_percentage
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_late_mitosis_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_late_mitosis_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_late_mitosis_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_late_mitosis_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_late_mitosis_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_late_mitosis_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_late_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_late_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_late_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_mitosis_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_mitosis_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_mitosis_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_mitosis_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_mitosis_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_mitosis_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_mitosis_n_objects
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_mitosis_n_objects
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_mitosis_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_mitosis_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_mitosis_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_mitosis_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_mitosis_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_polynuclear_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polynuclear_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polynuclear_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_polynuclear_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_polynuclear_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polynuclear_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polynuclear_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_polynuclear_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_polynuclear_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polynuclear_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polynuclear_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_polynuclear_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_polynuclear_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polynuclear_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polynuclear_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: cc_polynuclear_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False

Now Training Target: cc_polyploid_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polyploid_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polyploid_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_polyploid_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_polyploid_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polyploid_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polyploid_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_polyploid_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_polyploid_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polyploid_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polyploid_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_polyploid_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_polyploid_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_polyploid_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_polyploid_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_polyploid_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_s_high_h2ax
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_s_high_h2ax
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_s_high_h2ax
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_s_high_h2ax
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_s_intensity_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_s_intensity_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_s_intensity_nucleus_area_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_s_intensity_nucleus_area_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_s_intensity_nucleus_area_sum
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_s_intensity_nucleus_area_sum
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_s_intensity_nucleus_area_sum
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_s_intensity_nucleus_area_sum
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_s_n_objects
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_s_n_objects
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_s_n_objects
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_s_n_objects
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: cc_s_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_s_n_spots_h2ax_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_s_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_s_n_spots_h2ax_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: cc_s_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: cc_s_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: cc_s_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]


Now Training Target: cc_s_n_spots_h2ax_per_nucleus_area_mean
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: vb_infection_percentage
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_infection_percentage
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_infection_percentage
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: vb_infection_percentage
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: vb_live_cell_area
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_live_cell_area
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_live_cell_area
[Class] Model: Classification; Shuffle: True



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: vb_live_cell_area
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]
  recall = tps / tps[-1]


Now Training Target: vb_live_cell_roundness
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_live_cell_roundness
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_live_cell_roundness
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_live_cell_roundness
[Class] Model: Classification; Shuffle: False

Now Training Target: vb_live_cell_width_length
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_live_cell_width_length
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_live_cell_width_length
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_live_cell_width_length
[Class] Model: Classification; Shuffle: False

Now Training Target: vb_num_live_cells
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_num_live_cells
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_num_live_cells
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_num_live

  recall = tps / tps[-1]


Now Training Target: vb_percent_all_apoptosis
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: vb_percent_caspase_dead_only
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_percent_caspase_dead_only
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_percent_caspase_dead_only
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_percent_caspase_dead_only
[Class] Model: Classification; Shuffle: False

Now Training Target: vb_percent_dead
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_percent_dead
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_percent_dead
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_percent_dead
[Class] Model: Classification; Shuffle: False

Now Training Target: vb_percent_dead_only
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_percent_dead_only
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_percent_dead_only
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_percent_dead_on

  recall = tps / tps[-1]


Now Training Target: vb_percent_early_apoptosis
[Class] Model: Classification; Shuffle: False



  recall = tps / tps[-1]


Now Training Target: vb_percent_late_apoptosis
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_percent_late_apoptosis
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_percent_late_apoptosis
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_percent_late_apoptosis
[Class] Model: Classification; Shuffle: False

Now Training Target: vb_percent_live
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_percent_live
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_percent_live
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_percent_live
[Class] Model: Classification; Shuffle: False

Now Training Target: vb_ros_back_mean
[Class] Model: Regression; Shuffle: True

Now Training Target: vb_ros_back_mean
[Class] Model: Regression; Shuffle: False

Now Training Target: vb_ros_back_mean
[Class] Model: Classification; Shuffle: True

Now Training Target: vb_ros_back_mean
[Class] Model: Classifica

  recall = tps / tps[-1]


Now Training Target: vb_ros_mean
[Class] Model: Classification; Shuffle: False

CPU times: user 6h 28min 45s, sys: 15min 53s, total: 6h 44min 39s
Wall time: 38min 11s


  recall = tps / tps[-1]


In [13]:
# These are the models that cannot be fit
not_fit_df = pd.DataFrame(cannot_fit_list, columns=["target", "y_transform", "shuffle"])
not_fit_file = os.path.join("results", "not_fit_models_{}.tsv".format(consensus))

not_fit_df.to_csv(not_fit_file, sep='\t', index=False)
not_fit_df

Unnamed: 0,target,y_transform,shuffle
0,cc_all_nucleus_roundness_mean,binarize,True
1,cc_all_nucleus_roundness_mean,binarize,False
2,cc_cc_late_mitosis,binarize,True
3,cc_cc_late_mitosis,binarize,False
4,vb_live_cell_roundness,binarize,True
5,vb_live_cell_roundness,binarize,False
6,vb_live_cell_width_length,binarize,True
7,vb_live_cell_width_length,binarize,False
8,vb_num_live_cells,binarize,True
9,vb_num_live_cells,binarize,False


In [14]:
# Acquire output metrics
full_cv_df = pd.concat(cv_results_list).reset_index(drop=True)
full_regression_results_df = pd.concat(regression_results_list).reset_index(drop=True)
full_roc_df = pd.concat(roc_results_list).reset_index(drop=True)
full_pr_df = pd.concat(pr_results_list).reset_index(drop=True)
full_coef_df = pd.concat(all_coefs_list).reset_index(drop=True)
full_y_df = pd.concat(all_y_label_list).reset_index(drop=True)

In [15]:
# Save all results
results_dir = "results"
os.makedirs(results_dir, exist_ok=True)

file = os.path.join(results_dir, "full_cell_health_cv_results_{}.tsv.gz".format(consensus))
full_cv_df.to_csv(file, sep='\t', index=False)

file = os.path.join(results_dir, "full_cell_health_regression_{}.tsv.gz".format(consensus))
full_regression_results_df.to_csv(file, sep='\t', index=False)

file = os.path.join(results_dir, "full_cell_health_roc_results_{}.tsv.gz".format(consensus))
full_roc_df.to_csv(file, sep='\t', index=False)

file = os.path.join(results_dir, "full_cell_health_pr_results_{}.tsv.gz".format(consensus))
full_pr_df.to_csv(file, sep='\t', index=False)

file = os.path.join(results_dir, "full_cell_health_coefficients_{}.tsv.gz".format(consensus))
full_coef_df.to_csv(file, sep='\t', index=False)

file = os.path.join(results_dir, "full_cell_health_y_labels_{}.tsv.gz".format(consensus))
full_y_df.to_csv(file, sep='\t', index=False)