In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns 
import math
import statistics
import random

from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler

from collections import Counter

In [2]:
def load_data(path):
    """
    Loads the csv or csv.gz file into a pandas df, providing unique ID.
    """
    df = pd.read_csv(path)
    
    # Seperate data and metadata
    metadata_cols = [i for i in df.columns if 'Metadata' in i]
    metadata = df[metadata_cols]
    data_cols = [i for i in df.columns if i not in metadata_cols]
    
    # Remove exclude features from the data df
    data_cols = [x for x in data_cols if x.find('Costes')<0 if x.find('Manders')<0 if x.find('RWC')<0 
            if x.find('Parent')<0 if x.find('Object_Number')<0 if x.find('ObjectNumber')<0]
    data = df[data_cols]
    
    # Use metadata to create the index
    metadata_df = metadata.copy()
    data_df = data.copy()
    metadata_df['my_id'] = metadata['Metadata_line_source'] + '_' + metadata['Metadata_line_condition'] + '_' + \
        metadata['Metadata_line_ID'].astype(str) + '_' + metadata['Metadata_well_position']
    data_df['my_id'] = metadata_df['my_id']
    data_df.set_index('my_id', inplace=True)
    print('df shape: {}'.format(data_df.shape))
    
#     Remove the extraneous 'control' or 'deletion' for the isogenic perturbs, to keep naming consistency.
    new_idx = data_df.index.tolist()
    new_idx = [i.split('_')[0]+'_'+ i.split('_')[1]+'_'+ i.split('_')[3]+'_' +i.split('_')[4] \
               if 'genic' in i else i for i in new_idx]
    
    data_df.index = new_idx
    return data_df

def unstack_formatted_matrix(df, method='spearman', double=False):
    """
    df: a matrix which was the result of get_formatted_matrix
    double: whether to replicate the df and append, with id1 and id2 reversed. That way, all perturbs are represented in
        each id column, making finding non-group correlations easier
    returns: unstacked version of the matrix
    """
    
    # Unstacks the df and gets pairwise correlations for every perturbation instance
    unstack_agg_features_all = df.T.copy()
    unstack_agg_features_all = unstack_agg_features_all.corr(method=method)
    unstack_agg_features_all_tophalf = unstack_agg_features_all.where(np.triu(np.ones(unstack_agg_features_all.shape)).astype(np.bool))

    unstack_agg_all = unstack_agg_features_all_tophalf.unstack().reset_index()
    unstack_corrs_agg = unstack_agg_all.rename(columns={"level_0": "id1", "level_1": "id2", 0: "correlation"})
    # At this point, should be (384+383+...+3+2+1) = 73920 rows
    unstack_corrs_agg = unstack_corrs_agg[~unstack_corrs_agg['correlation'].isna()]
     
    # Replicate and append the dataframe, if double is True, so that each perturb is present in both cols. 
    # Should have 73920*2 cols.
    if double:
        repl = unstack_corrs_agg.copy()
        temp_id1 = repl['id1'].tolist()
        temp_id2 = repl['id2'].tolist()
        repl['id1'] = temp_id2
        repl['id2'] = temp_id1
        unstack_corrs_agg = unstack_corrs_agg.append(repl)

    # Remove correlations where id1 == id2 (same well), should have 73536(*2) rows
    unstack_corrs_agg = unstack_corrs_agg[unstack_corrs_agg['id1'] != unstack_corrs_agg['id2']]
    unstack_corrs_agg.reset_index(inplace=True, drop=True)     
    
    # Add columns to label unique perturbs
    unstack_corrs_agg['perturb1'] = unstack_corrs_agg['id1'].str.split('_').map(lambda x: x[0] + '_' + x[1])
    unstack_corrs_agg['perturb2'] = unstack_corrs_agg['id2'].str.split('_').map(lambda x: x[0] + '_' + x[1])
    unstack_corrs_agg['cell_line1'] = unstack_corrs_agg['id1'].str.split('_').map(lambda x: x[0] + '_' + x[1] + '_' + x[2])
    unstack_corrs_agg['cell_line2'] = unstack_corrs_agg['id2'].str.split('_').map(lambda x: x[0] + '_' + x[1] + '_' + x[2])
    return unstack_corrs_agg 

In [3]:
def hierarchical_cluster(df, threshold, show=False):
    """
    Returns df of cluster features
    """
    threshold=threshold
    Z1 = linkage(df, 'ward')
    flat_linkage1 = fcluster(Z1, t=threshold, criterion='distance')
    cluster_features1 = id_to_cluster(flat_linkage1, df)
    
    if show:
        plt.figure(figsize=(12, df.shape[0]/4))
        plt.axvline(x=threshold)
        label = [str(i) + ' ' + j for i, j in zip(flat_linkage1.tolist(), df.index.tolist())]
        plt.title('Clusters based on cell profiler features')
        dend1 = dendrogram(Z1, color_threshold=threshold, orientation='left', leaf_font_size=10, labels=label)
    return cluster_features1

def id_to_cluster(linkage_data, agg_features_df):
    """
    linkage_data: array of cluster numbers
    agg_features_df: df of aggregated features to merge, index must match order of linkage_data
    Returns a df with aggregated cp data and cluster number for each id
    """
    # Get cluster number with the aggregated feature data
    clusters_hierarchal_df = pd.DataFrame(data=linkage_data, index=agg_features_df.index)
    clusters_hierarchal_df.rename(columns={0:'cluster_num'}, inplace=True)
    clusters_hierarchal_df = clusters_hierarchal_df.merge(agg_features_df, how='left', left_index=True, right_index=True)

    # Get the cell profiler features by cluster
    cp_features_by_cluster = clusters_hierarchal_df.groupby(by='cluster_num').mean()
    
    return clusters_hierarchal_df

In [4]:
def ttest_2_df(df1, df2, alpha=0.05, reject_only=True):
    """
    Inputs are 2 dataframes with the same column features. Returns FDR adjusted p-values for each feature.
    """
#     assert df1.columns == df2.columns
    z_score, pvals = ttest_ind(df1.values, df2.values, axis=0)
    p_df = pd.DataFrame(pvals, index = df1.columns)
    p_df = p_df.rename(columns={0: 'pval'}) # unadjusted p values
    
    # Remove first row, which is just a ttest on the column number
    p_df =  p_df.iloc[1:]
    
    # Get corrected p-values
    q_df = p_df
    q_df['adj_p_value'] = multipletests(p_df['pval'].values, alpha=alpha, method='fdr_bh')[1]
    q_df['reject_null'] = multipletests(p_df['pval'].values, alpha=alpha, method='fdr_bh')[0]
    
    if reject_only:
        q_df = q_df[q_df['reject_null'] == True]
    
    return q_df

In [5]:
# Progenitors data
from pathlib import Path
path = os.getcwd()
base_dir = str(Path(path).parent)

switch_isogenic_labels = True

#FS data
progenitors = pd.read_csv(base_dir + '/feature_sets/Progenitors/0714_stdev_corr_fs.csv', index_col=0)

exclude = ['5', '6', '33', '12', '16']
try:
    idx = [i for i in progenitors.index.tolist() if i.split('_')[2] not in exclude] # Exclude patient number
    progenitors = progenitors[progenitors.index.isin(idx)]
except: pass

print ('shape: {}'.format(progenitors.shape))

shape: (344, 508)


In [6]:
# STEM data
# Or use my FS data
stem = pd.read_csv(base_dir + '/feature_sets/STEM01/0621_stdev_corr_fs.csv', index_col=0)


try:
    idx = [i for i in stem.index.tolist() if i.split('_')[-1] not in exclude]
    stem = stem[stem.index.isin(idx)]
except: pass


print ('shape: {}'.format(stem.shape))

shape: (384, 559)


In [7]:
labels = progenitors.index.tolist()
if switch_isogenic_labels:
    for i in range(len(labels)):
        if 'isogenic_deletion' in labels[i]:
            labels[i] = labels[i].replace('isogenic_deletion', 'temp')
    for i in range(len(labels)):
        if 'isogenic_control' in labels[i]:
            labels[i] = labels[i].replace('isogenic_control', 'isogenic_deletion')            
        if 'temp' in labels[i]:
            labels[i] = labels[i].replace('temp', 'isogenic_control')                    
progenitors.index = labels
labels = stem.index.tolist()
if switch_isogenic_labels:
    for i in range(len(labels)):
        if 'isogenic_deletion' in labels[i]:
            labels[i] = labels[i].replace('isogenic_deletion', 'temp')
    for i in range(len(labels)):
        if 'isogenic_control' in labels[i]:
            labels[i] = labels[i].replace('isogenic_control', 'isogenic_deletion')            
        if 'temp' in labels[i]:
            labels[i] = labels[i].replace('temp', 'isogenic_control')                    
stem.index = labels

In [28]:
progenitors
progenitors_human_ctrl = progenitors[progenitors.index.str.contains('human_control')]
progenitors_human_del = progenitors[progenitors.index.str.contains('human_deletion')]
progenitors_human_ctrl

Unnamed: 0,Cells_AreaShape_Area,Cells_AreaShape_Center_X,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_EulerNumber,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,Cells_AreaShape_MaximumRadius,Cells_AreaShape_Orientation,Cells_AreaShape_Zernike_1_1,...,Nuclei_Intensity_StdIntensityEdge_RNA,Cells_RadialDistribution_FracAtD_RNA_1of4,Cells_RadialDistribution_FracAtD_RNA_4of4,Cells_RadialDistribution_RadialCV_RNA_1of4,Cytoplasm_RadialDistribution_FracAtD_RNA_1of4,Cytoplasm_RadialDistribution_FracAtD_RNA_3of4,Cytoplasm_RadialDistribution_FracAtD_RNA_4of4,Cytoplasm_RadialDistribution_MeanFrac_RNA_1of4,Cytoplasm_RadialDistribution_MeanFrac_RNA_4of4,Nuclei_RadialDistribution_RadialCV_RNA_1of4
human_control_1_A01,0.031558,-0.083367,0.664552,0.391890,0.378463,-0.780011,-0.920009,0.407545,0.216914,-0.076005,...,-0.476490,-0.356531,0.172471,0.942807,-0.170803,-0.128707,0.161207,0.595555,-0.760697,1.448303
human_control_1_A02,-0.054664,-0.227870,-0.182981,0.057832,-0.104090,-0.077548,-0.245233,0.146814,0.059290,0.047741,...,-0.976950,-0.528343,0.536886,0.372891,-0.117456,0.068131,0.001234,-0.175402,-0.043803,1.253137
human_control_9_A05,-0.069913,0.521924,1.036712,0.399706,-0.082652,-0.907102,-0.741057,-0.005502,-0.540924,0.277164,...,1.140715,-0.010694,-0.111528,0.925415,0.400532,0.332637,-0.377230,0.982143,-0.921211,0.697296
human_control_9_A06,0.006089,0.412618,1.265076,0.523974,0.468320,-1.029449,-0.949079,0.265383,-0.103342,0.290485,...,1.311540,0.262503,-0.358536,1.177824,0.784333,0.564045,-0.657585,1.265338,-1.281002,0.717286
human_control_4_A09,-0.126587,0.185204,-0.480269,-0.033032,0.112419,0.198445,0.181502,-0.100191,0.195309,0.230866,...,-0.535681,-0.436796,0.396018,-0.147750,0.095149,0.219116,-0.196945,-0.441680,0.261067,0.324191
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
human_control_3_P14,-0.112674,0.059101,-0.553863,-0.165947,-0.543208,0.281459,0.163004,-0.085061,0.123159,0.292257,...,-1.082922,-0.488846,0.507367,-0.511480,0.457948,0.778916,-0.702102,-0.434428,0.188902,-0.399964
human_control_13_P19,-0.014410,-0.040278,-0.306458,0.039965,-1.253933,0.104759,-0.164122,0.136950,0.284888,-0.198071,...,-0.465036,-0.696673,0.829369,0.192893,0.199754,0.544418,-0.452333,-0.532661,0.388996,0.266641
human_control_13_P20,0.016831,0.019864,-0.263138,0.072305,-1.013906,0.055551,-0.241643,0.201379,0.395312,-0.012498,...,-0.734770,-0.628921,0.722488,0.233743,0.424642,0.684446,-0.626861,-0.398738,0.132872,-0.000707
human_control_22_P23,-0.158956,0.361959,-0.394386,0.093896,0.132013,0.127585,0.194439,-0.267188,0.250969,0.279007,...,-0.447874,-0.249812,0.189123,-0.834674,0.245688,0.457528,-0.400628,-0.686206,0.829092,-1.135828


In [25]:
stem_selected = stem.loc[progenitors.index.tolist()]
stem_selected_human_ctrl = stem_selected[stem_selected.index.str.contains('human_control')]
stem_selected_human_del = stem_selected[stem_selected.index.str.contains('human_deletion')]

In [30]:
shared_features = [i for i in progenitors.columns if i in stem.columns]
# feature_correlations = pd.DataFrame(columns='correlation')
# 
# for feature in shared_features:
#     ttest_ind

In [11]:
ttest_ind(progenitors['Cells_AreaShape_Area'], stem_selected['Cells_AreaShape_Area'])

Ttest_indResult(statistic=-6.716724178302358, pvalue=3.9042645751391045e-11)

In [15]:
progenitors_human_diff_features = ['Cells_AreaShape_Compactness',
 'Cells_AreaShape_Extent',
 'Cells_AreaShape_FormFactor',
 'Cells_AreaShape_MaximumRadius',
 'Cells_AreaShape_Zernike_2_0',
 'Cells_AreaShape_Zernike_2_2',
 'Cells_AreaShape_Zernike_4_4',
 'Cells_AreaShape_Zernike_5_1',
 'Cells_AreaShape_Zernike_5_3',
 'Cells_AreaShape_Zernike_6_2',
 'Cells_AreaShape_Zernike_7_1',
 'Cells_AreaShape_Zernike_7_3',
 'Cells_AreaShape_Zernike_8_2',
 'Cells_AreaShape_Zernike_8_4',
 'Cells_AreaShape_Zernike_8_6',
 'Cells_AreaShape_Zernike_9_1',
 'Cells_AreaShape_Zernike_9_3',
 'Cells_AreaShape_Zernike_9_5',
 'Cells_Correlation_Correlation_DNA_Mito',
 'Cells_Correlation_Correlation_Mito_AGP',
 'Cells_Correlation_Correlation_Mito_ER',
 'Cells_Correlation_K_AGP_Brightfield',
 'Cells_Correlation_K_Brightfield_ER',
 'Cells_Correlation_K_Brightfield_RNA',
 'Cells_Correlation_K_DNA_Brightfield',
 'Cells_Correlation_K_DNA_Mito',
 'Cells_Correlation_K_ER_Brightfield',
 'Cells_Correlation_K_Mito_Brightfield',
 'Cells_Correlation_K_RNA_Brightfield',
 'Cells_Correlation_Overlap_DNA_AGP',
 'Cells_Correlation_Overlap_DNA_ER',
 'Cells_Correlation_Overlap_ER_AGP',
 'Cells_Correlation_Overlap_ER_Brightfield',
 'Cells_Correlation_Overlap_ER_RNA',
 'Cells_Correlation_Overlap_Mito_AGP',
 'Cells_Correlation_Overlap_Mito_ER',
 'Cells_Correlation_Overlap_Mito_RNA',
 'Cells_Correlation_Overlap_RNA_AGP',
 'Cells_Granularity_10_AGP',
 'Cells_Granularity_10_Brightfield',
 'Cells_Granularity_10_ER',
 'Cells_Granularity_10_Mito',
 'Cells_Granularity_10_RNA',
 'Cells_Granularity_11_Brightfield',
 'Cells_Granularity_1_Brightfield',
 'Cells_Granularity_1_Mito',
 'Cells_Granularity_1_RNA',
 'Cells_Granularity_2_AGP',
 'Cells_Granularity_2_Brightfield',
 'Cells_Granularity_2_Mito',
 'Cells_Granularity_3_Brightfield',
 'Cells_Granularity_4_Brightfield',
 'Cells_Granularity_5_Brightfield',
 'Cells_Granularity_6_Brightfield',
 'Cells_Granularity_7_Brightfield',
 'Cells_Granularity_8_Brightfield',
 'Cells_Granularity_9_Brightfield',
 'Cells_Intensity_IntegratedIntensityEdge_AGP',
 'Cells_Intensity_IntegratedIntensityEdge_Brightfield',
 'Cells_Intensity_IntegratedIntensityEdge_DNA',
 'Cells_Intensity_IntegratedIntensityEdge_ER',
 'Cells_Intensity_IntegratedIntensityEdge_Mito',
 'Cells_Intensity_IntegratedIntensityEdge_RNA',
 'Cells_Intensity_IntegratedIntensity_Mito',
 'Cells_Intensity_LowerQuartileIntensity_Brightfield',
 'Cells_Intensity_LowerQuartileIntensity_DNA',
 'Cells_Intensity_MADIntensity_Brightfield',
 'Cells_Intensity_MassDisplacement_AGP',
 'Cells_Intensity_MassDisplacement_Brightfield',
 'Cells_Intensity_MassDisplacement_DNA',
 'Cells_Intensity_MassDisplacement_ER',
 'Cells_Intensity_MassDisplacement_Mito',
 'Cells_Intensity_MassDisplacement_RNA',
 'Cells_Intensity_MaxIntensityEdge_AGP',
 'Cells_Intensity_MaxIntensityEdge_DNA',
 'Cells_Intensity_MaxIntensityEdge_ER',
 'Cells_Intensity_MaxIntensityEdge_RNA',
 'Cells_Intensity_MaxIntensity_RNA',
 'Cells_Intensity_MeanIntensityEdge_Brightfield',
 'Cells_Intensity_MinIntensityEdge_Brightfield',
 'Cells_Intensity_MinIntensity_Brightfield',
 'Cells_Intensity_StdIntensityEdge_Brightfield',
 'Cells_Intensity_StdIntensityEdge_RNA',
 'Cells_RadialDistribution_FracAtD_Brightfield_1of4',
 'Cells_RadialDistribution_FracAtD_Brightfield_4of4',
 'Cells_RadialDistribution_MeanFrac_AGP_3of4',
 'Cells_RadialDistribution_MeanFrac_Brightfield_3of4',
 'Cells_RadialDistribution_MeanFrac_Brightfield_4of4',
 'Cells_RadialDistribution_MeanFrac_DNA_3of4',
 'Cells_RadialDistribution_MeanFrac_Mito_3of4',
 'Cells_RadialDistribution_RadialCV_Brightfield_1of4',
 'Cells_RadialDistribution_RadialCV_Brightfield_4of4',
 'Cells_RadialDistribution_RadialCV_DNA_1of4',
 'Cells_RadialDistribution_RadialCV_ER_1of4',
 'Cells_RadialDistribution_RadialCV_Mito_1of4',
 'Cells_RadialDistribution_RadialCV_RNA_1of4',
 'Cells_Texture_AngularSecondMoment_Brightfield_20_01',
 'Cells_Texture_AngularSecondMoment_DNA_10_00',
 'Cells_Texture_AngularSecondMoment_DNA_20_01',
 'Cells_Texture_AngularSecondMoment_Mito_10_00',
 'Cells_Texture_AngularSecondMoment_RNA_20_01',
 'Cells_Texture_Contrast_Mito_10_00',
 'Cells_Texture_Correlation_AGP_20_01',
 'Cells_Texture_Correlation_Brightfield_10_00',
 'Cells_Texture_Correlation_Brightfield_10_01',
 'Cells_Texture_Correlation_Brightfield_10_03',
 'Cells_Texture_Correlation_Brightfield_20_00',
 'Cells_Texture_Correlation_Brightfield_20_01',
 'Cells_Texture_Correlation_Brightfield_20_03',
 'Cells_Texture_Correlation_Brightfield_5_00',
 'Cells_Texture_Correlation_DNA_10_00',
 'Cells_Texture_Correlation_DNA_5_00',
 'Cells_Texture_Correlation_ER_10_00',
 'Cells_Texture_Correlation_ER_20_01',
 'Cells_Texture_Correlation_Mito_10_00',
 'Cells_Texture_Correlation_Mito_20_01',
 'Cells_Texture_Correlation_Mito_5_00',
 'Cells_Texture_Correlation_RNA_10_00',
 'Cells_Texture_Correlation_RNA_20_00',
 'Cells_Texture_Correlation_RNA_20_01',
 'Cells_Texture_DifferenceVariance_Brightfield_10_00',
 'Cells_Texture_DifferenceVariance_DNA_10_00',
 'Cells_Texture_InfoMeas1_AGP_10_01',
 'Cells_Texture_InfoMeas1_AGP_20_00',
 'Cells_Texture_InfoMeas1_AGP_20_01',
 'Cells_Texture_InfoMeas1_Brightfield_10_00',
 'Cells_Texture_InfoMeas1_DNA_20_01',
 'Cells_Texture_InfoMeas1_DNA_5_00',
 'Cells_Texture_InfoMeas1_ER_10_00',
 'Cells_Texture_InfoMeas1_ER_10_01',
 'Cells_Texture_InfoMeas1_Mito_10_03',
 'Cells_Texture_InfoMeas1_Mito_5_00',
 'Cells_Texture_InfoMeas1_RNA_10_01',
 'Cells_Texture_InfoMeas2_AGP_20_00',
 'Cells_Texture_InfoMeas2_Brightfield_10_00',
 'Cells_Texture_InfoMeas2_Brightfield_5_00',
 'Cells_Texture_InfoMeas2_Mito_10_00',
 'Cells_Texture_InfoMeas2_Mito_20_00',
 'Cells_Texture_SumAverage_AGP_10_01',
 'Cells_Texture_SumAverage_Brightfield_10_00',
 'Cells_Texture_SumAverage_ER_10_00',
 'Cells_Texture_SumAverage_RNA_10_00',
 'Cytoplasm_AreaShape_Area',
 'Cytoplasm_AreaShape_Compactness',
 'Cytoplasm_AreaShape_MajorAxisLength',
 'Cytoplasm_AreaShape_Zernike_2_0',
 'Cytoplasm_AreaShape_Zernike_2_2',
 'Cytoplasm_AreaShape_Zernike_4_0',
 'Cytoplasm_AreaShape_Zernike_4_4',
 'Cytoplasm_AreaShape_Zernike_7_1',
 'Cytoplasm_AreaShape_Zernike_7_3',
 'Cytoplasm_AreaShape_Zernike_8_2',
 'Cytoplasm_AreaShape_Zernike_8_4',
 'Cytoplasm_AreaShape_Zernike_8_6',
 'Cytoplasm_AreaShape_Zernike_9_3',
 'Cytoplasm_AreaShape_Zernike_9_5',
 'Cytoplasm_Granularity_2_AGP',
 'Cytoplasm_Granularity_2_Mito',
 'Cytoplasm_Granularity_2_RNA',
 'Cytoplasm_Granularity_3_AGP',
 'Cytoplasm_RadialDistribution_MeanFrac_AGP_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_AGP_4of4',
 'Cytoplasm_RadialDistribution_MeanFrac_Brightfield_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_Brightfield_3of4',
 'Cytoplasm_RadialDistribution_MeanFrac_ER_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_ER_4of4',
 'Cytoplasm_RadialDistribution_MeanFrac_RNA_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_RNA_4of4',
 'Cytoplasm_RadialDistribution_RadialCV_Brightfield_1of4',
 'Cytoplasm_RadialDistribution_RadialCV_Brightfield_2of4',
 'Cytoplasm_Texture_AngularSecondMoment_Brightfield_10_00',
 'Cytoplasm_Texture_AngularSecondMoment_Mito_10_00',
 'Cytoplasm_Texture_Correlation_AGP_20_01',
 'Cytoplasm_Texture_Correlation_Brightfield_10_00',
 'Cytoplasm_Texture_Correlation_Brightfield_10_01',
 'Cytoplasm_Texture_Correlation_Brightfield_10_02',
 'Cytoplasm_Texture_Correlation_Brightfield_10_03',
 'Cytoplasm_Texture_Correlation_Brightfield_20_00',
 'Cytoplasm_Texture_Correlation_Brightfield_20_01',
 'Cytoplasm_Texture_Correlation_Brightfield_20_02',
 'Cytoplasm_Texture_Correlation_Brightfield_20_03',
 'Cytoplasm_Texture_Correlation_Brightfield_5_00',
 'Cytoplasm_Texture_Correlation_Brightfield_5_03',
 'Cytoplasm_Texture_Correlation_DNA_10_00',
 'Cytoplasm_Texture_Correlation_DNA_10_01',
 'Cytoplasm_Texture_Correlation_DNA_20_00',
 'Cytoplasm_Texture_Correlation_DNA_20_01',
 'Cytoplasm_Texture_Correlation_ER_10_00',
 'Cytoplasm_Texture_Correlation_ER_20_00',
 'Cytoplasm_Texture_Correlation_ER_20_01',
 'Cytoplasm_Texture_Correlation_Mito_10_00',
 'Cytoplasm_Texture_Correlation_Mito_20_00',
 'Cytoplasm_Texture_Correlation_Mito_20_01',
 'Cytoplasm_Texture_Correlation_RNA_10_00',
 'Cytoplasm_Texture_Correlation_RNA_20_00',
 'Cytoplasm_Texture_Correlation_RNA_20_01',
 'Cytoplasm_Texture_DifferenceEntropy_DNA_20_00',
 'Cytoplasm_Texture_DifferenceVariance_DNA_10_00',
 'Cytoplasm_Texture_Entropy_DNA_20_00',
 'Cytoplasm_Texture_InfoMeas1_AGP_10_00',
 'Cytoplasm_Texture_InfoMeas1_AGP_5_00',
 'Cytoplasm_Texture_InfoMeas1_Brightfield_10_00',
 'Cytoplasm_Texture_InfoMeas1_DNA_10_00',
 'Cytoplasm_Texture_InfoMeas1_DNA_5_00',
 'Cytoplasm_Texture_InfoMeas1_DNA_5_01',
 'Cytoplasm_Texture_InfoMeas1_ER_10_00',
 'Cytoplasm_Texture_InfoMeas1_ER_5_00',
 'Cytoplasm_Texture_InfoMeas1_Mito_10_00',
 'Cytoplasm_Texture_InfoMeas1_Mito_5_00',
 'Cytoplasm_Texture_InfoMeas1_RNA_10_00',
 'Cytoplasm_Texture_InfoMeas1_RNA_5_00',
 'Cytoplasm_Texture_InfoMeas2_AGP_10_00',
 'Cytoplasm_Texture_InfoMeas2_DNA_10_00',
 'Cytoplasm_Texture_InfoMeas2_DNA_5_00',
 'Cytoplasm_Texture_InfoMeas2_Mito_10_00',
 'Cytoplasm_Texture_InfoMeas2_RNA_10_00',
 'Cytoplasm_Texture_InfoMeas2_RNA_5_00',
 'Nuclei_AreaShape_Area',
 'Nuclei_AreaShape_MaximumRadius',
 'Nuclei_AreaShape_Zernike_2_0',
 'Nuclei_AreaShape_Zernike_9_1',
 'Nuclei_Correlation_Correlation_DNA_ER',
 'Nuclei_Correlation_Correlation_DNA_Mito',
 'Nuclei_Correlation_Correlation_ER_Brightfield',
 'Nuclei_Correlation_Correlation_Mito_ER',
 'Nuclei_Correlation_K_AGP_Brightfield',
 'Nuclei_Correlation_K_Brightfield_ER',
 'Nuclei_Correlation_K_Brightfield_RNA',
 'Nuclei_Correlation_K_DNA_Brightfield',
 'Nuclei_Correlation_K_DNA_Mito',
 'Nuclei_Correlation_K_ER_Brightfield',
 'Nuclei_Correlation_K_Mito_Brightfield',
 'Nuclei_Correlation_K_RNA_Brightfield',
 'Nuclei_Correlation_Overlap_ER_RNA',
 'Nuclei_Correlation_Overlap_Mito_AGP',
 'Nuclei_Correlation_Overlap_Mito_RNA',
 'Nuclei_Granularity_10_DNA',
 'Nuclei_Granularity_1_DNA',
 'Nuclei_Granularity_5_DNA',
 'Nuclei_Granularity_6_DNA',
 'Nuclei_Granularity_7_DNA',
 'Nuclei_Intensity_IntegratedIntensityEdge_Brightfield',
 'Nuclei_Intensity_IntegratedIntensity_Brightfield',
 'Nuclei_Intensity_MADIntensity_Mito',
 'Nuclei_Intensity_MassDisplacement_AGP',
 'Nuclei_Intensity_MassDisplacement_Brightfield',
 'Nuclei_Intensity_MassDisplacement_DNA',
 'Nuclei_Intensity_MassDisplacement_ER',
 'Nuclei_Intensity_MassDisplacement_Mito',
 'Nuclei_Intensity_MassDisplacement_RNA',
 'Nuclei_Neighbors_NumberOfNeighbors_2',
 'Nuclei_RadialDistribution_MeanFrac_ER_1of4',
 'Nuclei_RadialDistribution_RadialCV_DNA_1of4',
 'Nuclei_RadialDistribution_RadialCV_ER_1of4',
 'Nuclei_RadialDistribution_RadialCV_Mito_1of4',
 'Nuclei_RadialDistribution_RadialCV_RNA_1of4',
 'Nuclei_Texture_AngularSecondMoment_RNA_20_03',
 'Nuclei_Texture_Correlation_Brightfield_20_00',
 'Nuclei_Texture_Correlation_Brightfield_20_03',
 'Nuclei_Texture_Correlation_DNA_10_00',
 'Nuclei_Texture_Correlation_DNA_10_01',
 'Nuclei_Texture_Correlation_DNA_5_01',
 'Nuclei_Texture_Correlation_ER_10_00',
 'Nuclei_Texture_Correlation_ER_10_01',
 'Nuclei_Texture_Correlation_ER_20_00',
 'Nuclei_Texture_Correlation_Mito_10_00',
 'Nuclei_Texture_Correlation_Mito_10_01',
 'Nuclei_Texture_Correlation_Mito_5_01',
 'Nuclei_Texture_Correlation_RNA_10_01',
 'Nuclei_Texture_InfoMeas1_AGP_10_01',
 'Nuclei_Texture_InfoMeas1_Mito_10_01',
 'Nuclei_Texture_InfoMeas1_Mito_5_00',
 'Nuclei_Texture_InfoMeas2_Mito_10_00',
 'Nuclei_Texture_InverseDifferenceMoment_AGP_20_00',
 'Nuclei_Texture_SumAverage_Brightfield_20_00',
 'Nuclei_Texture_SumAverage_RNA_10_00',
 'Nuclei_Texture_SumAverage_RNA_20_00']

stem_human_diff_features = ['Cells_AreaShape_Center_Y',
 'Cells_AreaShape_Compactness',
 'Cells_AreaShape_EulerNumber',
 'Cells_AreaShape_MaximumRadius',
 'Cells_AreaShape_Zernike_1_1',
 'Cells_AreaShape_Zernike_2_2',
 'Cells_AreaShape_Zernike_3_1',
 'Cells_AreaShape_Zernike_4_0',
 'Cells_AreaShape_Zernike_4_4',
 'Cells_AreaShape_Zernike_5_1',
 'Cells_AreaShape_Zernike_5_5',
 'Cells_AreaShape_Zernike_6_0',
 'Cells_AreaShape_Zernike_6_2',
 'Cells_AreaShape_Zernike_7_1',
 'Cells_AreaShape_Zernike_7_3',
 'Cells_AreaShape_Zernike_7_7',
 'Cells_AreaShape_Zernike_8_0',
 'Cells_AreaShape_Zernike_8_2',
 'Cells_AreaShape_Zernike_8_8',
 'Cells_AreaShape_Zernike_9_1',
 'Cells_AreaShape_Zernike_9_3',
 'Cells_AreaShape_Zernike_9_7',
 'Cells_AreaShape_Zernike_9_9',
 'Cells_Correlation_Correlation_AGP_Brightfield',
 'Cells_Correlation_Correlation_DNA_AGP',
 'Cells_Correlation_Correlation_DNA_ER',
 'Cells_Correlation_Correlation_DNA_RNA',
 'Cells_Correlation_Correlation_ER_AGP',
 'Cells_Correlation_Correlation_ER_Brightfield',
 'Cells_Correlation_Correlation_ER_RNA',
 'Cells_Correlation_Correlation_Mito_AGP',
 'Cells_Correlation_Correlation_Mito_RNA',
 'Cells_Correlation_K_AGP_Brightfield',
 'Cells_Correlation_K_AGP_DNA',
 'Cells_Correlation_K_AGP_ER',
 'Cells_Correlation_K_AGP_Mito',
 'Cells_Correlation_K_AGP_RNA',
 'Cells_Correlation_K_Brightfield_AGP',
 'Cells_Correlation_K_Brightfield_Mito',
 'Cells_Correlation_K_DNA_Mito',
 'Cells_Correlation_K_ER_Brightfield',
 'Cells_Correlation_K_ER_RNA',
 'Cells_Correlation_K_Mito_Brightfield',
 'Cells_Correlation_K_Mito_DNA',
 'Cells_Correlation_K_RNA_DNA',
 'Cells_Correlation_Overlap_AGP_Brightfield',
 'Cells_Correlation_Overlap_DNA_AGP',
 'Cells_Correlation_Overlap_DNA_ER',
 'Cells_Correlation_Overlap_DNA_Mito',
 'Cells_Correlation_Overlap_DNA_RNA',
 'Cells_Correlation_Overlap_ER_AGP',
 'Cells_Correlation_Overlap_ER_Brightfield',
 'Cells_Correlation_Overlap_Mito_AGP',
 'Cells_Correlation_Overlap_Mito_Brightfield',
 'Cells_Granularity_10_AGP',
 'Cells_Granularity_10_ER',
 'Cells_Granularity_10_Mito',
 'Cells_Granularity_10_RNA',
 'Cells_Granularity_11_AGP',
 'Cells_Granularity_12_AGP',
 'Cells_Granularity_13_AGP',
 'Cells_Granularity_13_ER',
 'Cells_Granularity_13_Mito',
 'Cells_Granularity_14_AGP',
 'Cells_Granularity_14_ER',
 'Cells_Granularity_14_Mito',
 'Cells_Granularity_15_AGP',
 'Cells_Granularity_15_ER',
 'Cells_Granularity_15_Mito',
 'Cells_Granularity_15_RNA',
 'Cells_Granularity_16_AGP',
 'Cells_Granularity_16_Mito',
 'Cells_Granularity_16_RNA',
 'Cells_Granularity_1_AGP',
 'Cells_Granularity_1_Brightfield',
 'Cells_Granularity_1_Mito',
 'Cells_Granularity_2_AGP',
 'Cells_Granularity_2_Brightfield',
 'Cells_Granularity_2_ER',
 'Cells_Granularity_2_Mito',
 'Cells_Granularity_2_RNA',
 'Cells_Granularity_3_AGP',
 'Cells_Granularity_3_Brightfield',
 'Cells_Granularity_4_AGP',
 'Cells_Granularity_5_AGP',
 'Cells_Intensity_IntegratedIntensityEdge_AGP',
 'Cells_Intensity_IntegratedIntensityEdge_DNA',
 'Cells_Intensity_IntegratedIntensityEdge_ER',
 'Cells_Intensity_IntegratedIntensityEdge_Mito',
 'Cells_Intensity_IntegratedIntensity_DNA',
 'Cells_Intensity_IntegratedIntensity_ER',
 'Cells_Intensity_LowerQuartileIntensity_AGP',
 'Cells_Intensity_LowerQuartileIntensity_DNA',
 'Cells_Intensity_LowerQuartileIntensity_ER',
 'Cells_Intensity_LowerQuartileIntensity_Mito',
 'Cells_Intensity_MADIntensity_DNA',
 'Cells_Intensity_MADIntensity_Mito',
 'Cells_Intensity_MADIntensity_RNA',
 'Cells_Intensity_MassDisplacement_AGP',
 'Cells_Intensity_MassDisplacement_ER',
 'Cells_Intensity_MassDisplacement_Mito',
 'Cells_Intensity_MassDisplacement_RNA',
 'Cells_Intensity_MaxIntensityEdge_AGP',
 'Cells_Intensity_MaxIntensityEdge_RNA',
 'Cells_Intensity_MaxIntensity_AGP',
 'Cells_Intensity_MaxIntensity_DNA',
 'Cells_Intensity_MaxIntensity_ER',
 'Cells_Intensity_MaxIntensity_Mito',
 'Cells_Intensity_MaxIntensity_RNA',
 'Cells_Intensity_MeanIntensity_ER',
 'Cells_Intensity_MedianIntensity_Mito',
 'Cells_Intensity_MinIntensityEdge_AGP',
 'Cells_Intensity_StdIntensityEdge_Brightfield',
 'Cells_Location_CenterMassIntensity_Y_AGP',
 'Cells_Location_CenterMassIntensity_Y_Brightfield',
 'Cells_Location_CenterMassIntensity_Y_DNA',
 'Cells_Location_CenterMassIntensity_Y_ER',
 'Cells_Location_CenterMassIntensity_Y_Mito',
 'Cells_Location_CenterMassIntensity_Y_RNA',
 'Cells_Neighbors_AngleBetweenNeighbors_10',
 'Cells_Neighbors_FirstClosestDistance_10',
 'Cells_RadialDistribution_FracAtD_AGP_1of4',
 'Cells_RadialDistribution_FracAtD_AGP_4of4',
 'Cells_RadialDistribution_FracAtD_Brightfield_1of4',
 'Cells_RadialDistribution_FracAtD_DNA_1of4',
 'Cells_RadialDistribution_FracAtD_DNA_3of4',
 'Cells_RadialDistribution_FracAtD_DNA_4of4',
 'Cells_RadialDistribution_FracAtD_ER_1of4',
 'Cells_RadialDistribution_FracAtD_ER_3of4',
 'Cells_RadialDistribution_FracAtD_ER_4of4',
 'Cells_RadialDistribution_FracAtD_Mito_1of4',
 'Cells_RadialDistribution_FracAtD_Mito_4of4',
 'Cells_RadialDistribution_FracAtD_RNA_1of4',
 'Cells_RadialDistribution_FracAtD_RNA_3of4',
 'Cells_RadialDistribution_FracAtD_RNA_4of4',
 'Cells_RadialDistribution_MeanFrac_AGP_3of4',
 'Cells_RadialDistribution_MeanFrac_Brightfield_1of4',
 'Cells_RadialDistribution_MeanFrac_Brightfield_2of4',
 'Cells_RadialDistribution_MeanFrac_Brightfield_4of4',
 'Cells_RadialDistribution_MeanFrac_DNA_3of4',
 'Cells_RadialDistribution_MeanFrac_ER_1of4',
 'Cells_RadialDistribution_MeanFrac_ER_3of4',
 'Cells_RadialDistribution_MeanFrac_Mito_3of4',
 'Cells_RadialDistribution_MeanFrac_RNA_1of4',
 'Cells_RadialDistribution_MeanFrac_RNA_3of4',
 'Cells_RadialDistribution_RadialCV_AGP_1of4',
 'Cells_RadialDistribution_RadialCV_AGP_4of4',
 'Cells_RadialDistribution_RadialCV_Brightfield_1of4',
 'Cells_RadialDistribution_RadialCV_DNA_1of4',
 'Cells_RadialDistribution_RadialCV_DNA_2of4',
 'Cells_RadialDistribution_RadialCV_DNA_3of4',
 'Cells_RadialDistribution_RadialCV_DNA_4of4',
 'Cells_RadialDistribution_RadialCV_Mito_1of4',
 'Cells_RadialDistribution_RadialCV_RNA_1of4',
 'Cells_RadialDistribution_RadialCV_RNA_4of4',
 'Cells_Texture_AngularSecondMoment_AGP_10_00',
 'Cells_Texture_AngularSecondMoment_DNA_10_00',
 'Cells_Texture_AngularSecondMoment_Mito_10_00',
 'Cells_Texture_Contrast_AGP_10_00',
 'Cells_Texture_Contrast_DNA_10_00',
 'Cells_Texture_Contrast_ER_10_00',
 'Cells_Texture_Contrast_Mito_10_00',
 'Cells_Texture_Contrast_RNA_10_00',
 'Cells_Texture_Correlation_AGP_10_00',
 'Cells_Texture_Correlation_AGP_20_00',
 'Cells_Texture_Correlation_AGP_20_01',
 'Cells_Texture_Correlation_Brightfield_10_00',
 'Cells_Texture_Correlation_Brightfield_10_01',
 'Cells_Texture_Correlation_Brightfield_10_02',
 'Cells_Texture_Correlation_Brightfield_10_03',
 'Cells_Texture_Correlation_Brightfield_5_00',
 'Cells_Texture_Correlation_Brightfield_5_03',
 'Cells_Texture_Correlation_DNA_10_00',
 'Cells_Texture_Correlation_ER_10_00',
 'Cells_Texture_Correlation_RNA_10_00',
 'Cells_Texture_Correlation_RNA_20_01',
 'Cells_Texture_DifferenceVariance_ER_10_00',
 'Cells_Texture_InfoMeas1_AGP_10_00',
 'Cells_Texture_InfoMeas1_AGP_20_00',
 'Cells_Texture_InfoMeas1_AGP_5_00',
 'Cells_Texture_InfoMeas1_Brightfield_10_00',
 'Cells_Texture_InfoMeas1_Brightfield_5_00',
 'Cells_Texture_InfoMeas1_Brightfield_5_01',
 'Cells_Texture_InfoMeas1_Brightfield_5_02',
 'Cells_Texture_InfoMeas1_DNA_10_00',
 'Cells_Texture_InfoMeas1_DNA_20_01',
 'Cells_Texture_InfoMeas1_DNA_5_00',
 'Cells_Texture_InfoMeas1_ER_10_00',
 'Cells_Texture_InfoMeas1_ER_20_00',
 'Cells_Texture_InfoMeas1_ER_20_01',
 'Cells_Texture_InfoMeas1_Mito_10_00',
 'Cells_Texture_InfoMeas1_RNA_10_00',
 'Cells_Texture_InfoMeas2_Brightfield_10_00',
 'Cells_Texture_InfoMeas2_Brightfield_5_00',
 'Cells_Texture_InfoMeas2_Brightfield_5_01',
 'Cells_Texture_InfoMeas2_Brightfield_5_02',
 'Cells_Texture_InfoMeas2_ER_20_01',
 'Cells_Texture_InfoMeas2_RNA_10_00',
 'Cells_Texture_InverseDifferenceMoment_ER_10_00',
 'Cells_Texture_SumAverage_ER_10_00',
 'Cells_Texture_SumAverage_RNA_10_00',
 'Cytoplasm_AreaShape_Center_Y',
 'Cytoplasm_AreaShape_EulerNumber',
 'Cytoplasm_AreaShape_Extent',
 'Cytoplasm_AreaShape_FormFactor',
 'Cytoplasm_AreaShape_Zernike_1_1',
 'Cytoplasm_AreaShape_Zernike_3_1',
 'Cytoplasm_AreaShape_Zernike_4_2',
 'Cytoplasm_AreaShape_Zernike_5_3',
 'Cytoplasm_AreaShape_Zernike_5_5',
 'Cytoplasm_AreaShape_Zernike_6_0',
 'Cytoplasm_AreaShape_Zernike_7_5',
 'Cytoplasm_AreaShape_Zernike_7_7',
 'Cytoplasm_AreaShape_Zernike_8_0',
 'Cytoplasm_AreaShape_Zernike_9_1',
 'Cytoplasm_AreaShape_Zernike_9_3',
 'Cytoplasm_AreaShape_Zernike_9_7',
 'Cytoplasm_AreaShape_Zernike_9_9',
 'Cytoplasm_Granularity_4_AGP',
 'Cytoplasm_Granularity_5_AGP',
 'Cytoplasm_Intensity_IntegratedIntensityEdge_DNA',
 'Cytoplasm_Intensity_IntegratedIntensityEdge_ER',
 'Cytoplasm_Intensity_IntegratedIntensityEdge_Mito',
 'Cytoplasm_Intensity_IntegratedIntensity_DNA',
 'Cytoplasm_Intensity_MADIntensity_AGP',
 'Cytoplasm_Intensity_MADIntensity_ER',
 'Cytoplasm_Intensity_MassDisplacement_DNA',
 'Cytoplasm_RadialDistribution_FracAtD_AGP_3of4',
 'Cytoplasm_RadialDistribution_FracAtD_AGP_4of4',
 'Cytoplasm_RadialDistribution_FracAtD_Brightfield_1of4',
 'Cytoplasm_RadialDistribution_FracAtD_Brightfield_4of4',
 'Cytoplasm_RadialDistribution_FracAtD_DNA_1of4',
 'Cytoplasm_RadialDistribution_FracAtD_DNA_4of4',
 'Cytoplasm_RadialDistribution_FracAtD_Mito_1of4',
 'Cytoplasm_RadialDistribution_FracAtD_Mito_4of4',
 'Cytoplasm_RadialDistribution_FracAtD_RNA_4of4',
 'Cytoplasm_RadialDistribution_MeanFrac_AGP_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_AGP_4of4',
 'Cytoplasm_RadialDistribution_MeanFrac_Brightfield_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_Brightfield_3of4',
 'Cytoplasm_RadialDistribution_MeanFrac_Brightfield_4of4',
 'Cytoplasm_RadialDistribution_MeanFrac_ER_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_ER_4of4',
 'Cytoplasm_RadialDistribution_MeanFrac_Mito_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_RNA_1of4',
 'Cytoplasm_RadialDistribution_MeanFrac_RNA_4of4',
 'Cytoplasm_RadialDistribution_RadialCV_Brightfield_1of4',
 'Cytoplasm_RadialDistribution_RadialCV_ER_4of4',
 'Cytoplasm_RadialDistribution_RadialCV_RNA_4of4',
 'Cytoplasm_Texture_Correlation_AGP_10_00',
 'Cytoplasm_Texture_Correlation_AGP_20_00',
 'Cytoplasm_Texture_Correlation_Brightfield_20_01',
 'Cytoplasm_Texture_Correlation_Brightfield_20_02',
 'Cytoplasm_Texture_Correlation_Brightfield_20_03',
 'Cytoplasm_Texture_Correlation_Brightfield_5_02',
 'Cytoplasm_Texture_Correlation_DNA_10_00',
 'Cytoplasm_Texture_InfoMeas1_AGP_10_00',
 'Cytoplasm_Texture_InfoMeas1_AGP_5_00',
 'Cytoplasm_Texture_InfoMeas1_Brightfield_10_00',
 'Cytoplasm_Texture_InfoMeas1_DNA_10_00',
 'Cytoplasm_Texture_InfoMeas1_ER_10_01',
 'Cytoplasm_Texture_InfoMeas1_RNA_10_00',
 'Cytoplasm_Texture_InfoMeas1_RNA_10_01',
 'Cytoplasm_Texture_InfoMeas1_RNA_5_00',
 'Cytoplasm_Texture_InfoMeas2_DNA_10_00',
 'Nuclei_AreaShape_Compactness',
 'Nuclei_AreaShape_EulerNumber',
 'Nuclei_AreaShape_MajorAxisLength',
 'Nuclei_AreaShape_MaximumRadius',
 'Nuclei_AreaShape_Perimeter',
 'Nuclei_AreaShape_Zernike_1_1',
 'Nuclei_AreaShape_Zernike_2_0',
 'Nuclei_AreaShape_Zernike_2_2',
 'Nuclei_AreaShape_Zernike_5_1',
 'Nuclei_AreaShape_Zernike_5_3',
 'Nuclei_AreaShape_Zernike_6_0',
 'Nuclei_AreaShape_Zernike_6_2',
 'Nuclei_AreaShape_Zernike_6_4',
 'Nuclei_AreaShape_Zernike_7_1',
 'Nuclei_AreaShape_Zernike_8_0',
 'Nuclei_AreaShape_Zernike_8_6',
 'Nuclei_AreaShape_Zernike_9_1',
 'Nuclei_AreaShape_Zernike_9_3',
 'Nuclei_Correlation_Correlation_AGP_Brightfield',
 'Nuclei_Correlation_Correlation_DNA_Brightfield',
 'Nuclei_Correlation_Correlation_DNA_Mito',
 'Nuclei_Correlation_Correlation_ER_Brightfield',
 'Nuclei_Correlation_Correlation_ER_RNA',
 'Nuclei_Correlation_Correlation_Mito_AGP',
 'Nuclei_Correlation_Correlation_Mito_ER',
 'Nuclei_Correlation_Correlation_RNA_AGP',
 'Nuclei_Correlation_K_AGP_Brightfield',
 'Nuclei_Correlation_K_AGP_DNA',
 'Nuclei_Correlation_K_AGP_Mito',
 'Nuclei_Correlation_K_AGP_RNA',
 'Nuclei_Correlation_K_Brightfield_AGP',
 'Nuclei_Correlation_K_Brightfield_Mito',
 'Nuclei_Correlation_K_Brightfield_RNA',
 'Nuclei_Correlation_K_ER_Mito',
 'Nuclei_Correlation_K_Mito_AGP',
 'Nuclei_Correlation_K_Mito_Brightfield',
 'Nuclei_Correlation_K_Mito_DNA',
 'Nuclei_Correlation_K_RNA_Brightfield',
 'Nuclei_Correlation_K_RNA_DNA',
 'Nuclei_Correlation_K_RNA_ER',
 'Nuclei_Correlation_Overlap_AGP_Brightfield',
 'Nuclei_Correlation_Overlap_DNA_Brightfield',
 'Nuclei_Correlation_Overlap_DNA_ER',
 'Nuclei_Correlation_Overlap_DNA_RNA',
 'Nuclei_Correlation_Overlap_ER_AGP',
 'Nuclei_Correlation_Overlap_RNA_AGP',
 'Nuclei_Correlation_Overlap_RNA_Brightfield',
 'Nuclei_Granularity_10_AGP',
 'Nuclei_Granularity_10_DNA',
 'Nuclei_Granularity_12_AGP',
 'Nuclei_Granularity_13_AGP',
 'Nuclei_Granularity_14_AGP',
 'Nuclei_Granularity_15_AGP',
 'Nuclei_Granularity_1_DNA',
 'Nuclei_Granularity_1_ER',
 'Nuclei_Granularity_1_RNA',
 'Nuclei_Granularity_2_AGP',
 'Nuclei_Granularity_2_Brightfield',
 'Nuclei_Granularity_2_DNA',
 'Nuclei_Granularity_2_ER',
 'Nuclei_Granularity_2_RNA',
 'Nuclei_Granularity_3_DNA',
 'Nuclei_Granularity_3_RNA',
 'Nuclei_Granularity_4_DNA',
 'Nuclei_Granularity_5_DNA',
 'Nuclei_Granularity_8_DNA',
 'Nuclei_Granularity_9_AGP',
 'Nuclei_Intensity_MADIntensity_Brightfield',
 'Nuclei_Intensity_MassDisplacement_AGP',
 'Nuclei_Intensity_MassDisplacement_Brightfield',
 'Nuclei_Intensity_MassDisplacement_Mito',
 'Nuclei_Intensity_MassDisplacement_RNA',
 'Nuclei_Neighbors_AngleBetweenNeighbors_2',
 'Nuclei_Neighbors_FirstClosestDistance_2',
 'Nuclei_Neighbors_NumberOfNeighbors_2',
 'Nuclei_RadialDistribution_FracAtD_AGP_1of4',
 'Nuclei_RadialDistribution_FracAtD_AGP_4of4',
 'Nuclei_RadialDistribution_FracAtD_Brightfield_1of4',
 'Nuclei_RadialDistribution_FracAtD_Brightfield_4of4',
 'Nuclei_RadialDistribution_FracAtD_DNA_1of4',
 'Nuclei_RadialDistribution_FracAtD_DNA_4of4',
 'Nuclei_RadialDistribution_FracAtD_ER_1of4',
 'Nuclei_RadialDistribution_FracAtD_ER_3of4',
 'Nuclei_RadialDistribution_FracAtD_ER_4of4',
 'Nuclei_RadialDistribution_FracAtD_Mito_1of4',
 'Nuclei_RadialDistribution_FracAtD_Mito_4of4',
 'Nuclei_RadialDistribution_FracAtD_RNA_1of4',
 'Nuclei_RadialDistribution_FracAtD_RNA_3of4',
 'Nuclei_RadialDistribution_FracAtD_RNA_4of4',
 'Nuclei_RadialDistribution_MeanFrac_AGP_3of4',
 'Nuclei_RadialDistribution_MeanFrac_AGP_4of4',
 'Nuclei_RadialDistribution_MeanFrac_Brightfield_3of4',
 'Nuclei_RadialDistribution_MeanFrac_DNA_1of4',
 'Nuclei_RadialDistribution_MeanFrac_DNA_4of4',
 'Nuclei_RadialDistribution_MeanFrac_ER_1of4',
 'Nuclei_RadialDistribution_MeanFrac_ER_3of4',
 'Nuclei_RadialDistribution_MeanFrac_ER_4of4',
 'Nuclei_RadialDistribution_MeanFrac_Mito_1of4',
 'Nuclei_RadialDistribution_MeanFrac_Mito_4of4',
 'Nuclei_RadialDistribution_MeanFrac_RNA_1of4',
 'Nuclei_RadialDistribution_MeanFrac_RNA_3of4',
 'Nuclei_RadialDistribution_MeanFrac_RNA_4of4',
 'Nuclei_RadialDistribution_RadialCV_AGP_1of4',
 'Nuclei_RadialDistribution_RadialCV_AGP_3of4',
 'Nuclei_RadialDistribution_RadialCV_AGP_4of4',
 'Nuclei_RadialDistribution_RadialCV_Brightfield_1of4',
 'Nuclei_RadialDistribution_RadialCV_DNA_1of4',
 'Nuclei_RadialDistribution_RadialCV_DNA_2of4',
 'Nuclei_RadialDistribution_RadialCV_ER_2of4',
 'Nuclei_RadialDistribution_RadialCV_Mito_1of4',
 'Nuclei_RadialDistribution_RadialCV_RNA_1of4',
 'Nuclei_RadialDistribution_RadialCV_RNA_4of4',
 'Nuclei_Texture_AngularSecondMoment_Brightfield_10_00',
 'Nuclei_Texture_AngularSecondMoment_Brightfield_20_00',
 'Nuclei_Texture_AngularSecondMoment_Brightfield_20_01',
 'Nuclei_Texture_AngularSecondMoment_Brightfield_20_03',
 'Nuclei_Texture_AngularSecondMoment_ER_10_00',
 'Nuclei_Texture_Correlation_AGP_5_00',
 'Nuclei_Texture_Correlation_Brightfield_10_01',
 'Nuclei_Texture_Correlation_DNA_10_01',
 'Nuclei_Texture_Correlation_DNA_10_03',
 'Nuclei_Texture_Correlation_DNA_20_00',
 'Nuclei_Texture_Correlation_DNA_20_01',
 'Nuclei_Texture_Correlation_DNA_20_02',
 'Nuclei_Texture_Correlation_ER_10_00',
 'Nuclei_Texture_Correlation_ER_10_01',
 'Nuclei_Texture_Correlation_ER_10_02',
 'Nuclei_Texture_Correlation_ER_10_03',
 'Nuclei_Texture_Correlation_RNA_10_00',
 'Nuclei_Texture_Correlation_RNA_20_00',
 'Nuclei_Texture_Correlation_RNA_20_02',
 'Nuclei_Texture_DifferenceEntropy_Brightfield_10_00',
 'Nuclei_Texture_DifferenceEntropy_Brightfield_20_00',
 'Nuclei_Texture_InfoMeas1_AGP_10_00',
 'Nuclei_Texture_InfoMeas1_AGP_10_01',
 'Nuclei_Texture_InfoMeas1_AGP_20_00',
 'Nuclei_Texture_InfoMeas1_DNA_10_01',
 'Nuclei_Texture_InfoMeas1_DNA_20_00',
 'Nuclei_Texture_InfoMeas1_DNA_5_00',
 'Nuclei_Texture_InfoMeas1_ER_10_00',
 'Nuclei_Texture_InfoMeas1_ER_10_01',
 'Nuclei_Texture_InfoMeas1_ER_20_00',
 'Nuclei_Texture_InfoMeas1_Mito_5_00',
 'Nuclei_Texture_InfoMeas1_RNA_10_00',
 'Nuclei_Texture_InfoMeas1_RNA_20_00',
 'Nuclei_Texture_InfoMeas1_RNA_5_00',
 'Nuclei_Texture_InfoMeas2_Brightfield_20_00',
 'Nuclei_Texture_InverseDifferenceMoment_DNA_10_00',
 'Nuclei_Texture_InverseDifferenceMoment_ER_20_00']

shared_human_diff = [i for i in progenitors_human_diff_features if i in stem_human_diff_features]

In [41]:
feature_diffs = pd.DataFrame(columns=['progenitors_ctrl', 'progenitors_del', 'stem_ctrl', 'stem_del', 'same_direction'])

for feature in shared_human_diff:
    same_direction=False
    prog_ctrl = progenitors_human_ctrl[feature].mean()
    prog_del = progenitors_human_del[feature].mean()
    prog_diff = prog_ctrl - prog_del
    stem_ctrl = stem_selected_human_ctrl[feature].mean()
    stem_del = stem_selected_human_del[feature].mean()
    stem_diff = stem_ctrl - stem_del
    if np.sign(prog_diff) == np.sign(stem_diff):
        same_direction = True
    feature_diffs.loc[feature] = [prog_ctrl, prog_del, stem_ctrl, stem_del, same_direction]

print(feature_diffs['same_direction'].value_counts())
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(feature_diffs)

True     90
False    32
Name: same_direction, dtype: int64


Unnamed: 0,progenitors_ctrl,progenitors_del,stem_ctrl,stem_del,same_direction
Cells_AreaShape_Compactness,0.238723,-0.137012,0.288596,-0.010123,True
Cells_AreaShape_MaximumRadius,-0.012336,-0.277024,0.147982,-0.344343,True
Cells_AreaShape_Zernike_2_2,-0.112773,0.273049,-0.322205,0.079491,True
Cells_AreaShape_Zernike_4_4,0.028706,0.343781,-0.367946,0.108186,True
Cells_AreaShape_Zernike_5_1,0.204836,0.016485,0.476719,-0.280325,True
Cells_AreaShape_Zernike_6_2,0.136906,-0.027605,0.317855,-0.038205,True
Cells_AreaShape_Zernike_7_1,0.202687,0.107174,0.478495,-0.263139,True
Cells_AreaShape_Zernike_7_3,0.189297,0.056429,0.3453,-0.1102,True
Cells_AreaShape_Zernike_8_2,0.075509,-0.023157,0.451548,-0.149496,True
Cells_AreaShape_Zernike_9_1,0.184299,0.045642,0.486593,-0.339523,True
