In [1]:
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns 
import math
import statistics
import random

from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler

from collections import Counter

from pycytominer import normalize

# Functions

In [2]:
def load_data(path):
    """
    Loads the csv or csv.gz file into a pandas df, providing unique ID.
    """
    df = pd.read_csv(path)
    
    # Seperate data and metadata
    metadata_cols = [i for i in df.columns if 'Metadata' in i]
    metadata = df[metadata_cols]
    data_cols = [i for i in df.columns if i not in metadata_cols]
    
    # Remove exclude features from the data df
    data_cols = [x for x in data_cols if x.find('Costes')<0 if x.find('Manders')<0 if x.find('RWC')<0]
    data = df[data_cols]
    
    # Use metadata to create the index
    metadata_df = metadata.copy()
    data_df = data.copy()
    metadata_df['my_id'] = metadata['Metadata_line_source'] + '_' + metadata['Metadata_line_condition'] + '_' + \
        metadata['Metadata_line_ID'].astype(str) + '_' + metadata['Metadata_well_position']
    data_df['my_id'] = metadata_df['my_id']
    data_df.set_index('my_id', inplace=True)
    print('df shape: {}'.format(data_df.shape))
    
#     Remove the extraneous 'control' or 'deletion' for the isogenic perturbs, to keep naming consistency.
    new_idx = data_df.index.tolist()
    new_idx = [i.split('_')[0]+'_'+ i.split('_')[1]+'_'+ i.split('_')[3]+'_' +i.split('_')[4] \
               if 'genic' in i else i for i in new_idx]
    
    data_df.index = new_idx
    return metadata_df

In [3]:
def var_threshold_selection(data, threshold=0.01):
    selector = VarianceThreshold(threshold)
    selector.fit(data)
    return data[data.columns[selector.get_support(indices=True)]]

def corr_threshold_selection(df, threshold):
    corr = df.corr(method='spearman')
    columns = np.full((corr.shape[0],), True, dtype=bool)
    for i in range(corr.shape[0]):
        for j in range(i+1, corr.shape[0]):
            if corr.iloc[i,j] >= threshold:
                if columns[j]:
                    columns[j] = False
    selected_columns = df.columns[columns]
    selected_data = df[selected_columns]
    return selected_data

In [4]:
def plot_corr_matrix(df, return_corrs=True):
    corrs = df.corr(method='spearman')
    size = df.shape[1]
    fig, ax = plt.subplots(figsize=(size/4,size/5)) 
    sns.heatmap(corrs, cmap='coolwarm', ax=ax)
    
    vals = list(np.triu(corrs, k=1).flatten())
    vals = [i for i in vals if i != 0]
    mean_corr = sum(vals)/len(vals)
    stdev_corr = statistics.stdev(vals)
    print('Number of features: {}'.format(df.shape))
    print ('Mean corr: {}, Stdev corr: {}'.format(mean_corr, stdev_corr))
    if return_corrs:
        return corrs
    else: pass
    
def get_corr_between_compartments(metafeature_df):
    cells_df = metafeature_df.T[metafeature_df.T.index.str.startswith('Cells')]
    cyto_df = metafeature_df.T[metafeature_df.T.index.str.startswith('Cytoplasm')]
    nuc_df = metafeature_df.T[metafeature_df.T.index.str.startswith('Nuclei')]
    cells_vals = cells_df.values.flatten().tolist()
    nyto_vals = cyto_df.values.flatten().tolist()
    nuc_vals = nuc_df.values.flatten().tolist()
    
    fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize=(14, 4))
    ax1.scatter(cells_vals, cyto_vals)
    ax2.scatter(cells_vals, nuc_vals)
    ax3.scatter(cyto_vals, nuc_vals)
    print('R2 scores: {}, {}, {}'.format(r2_score(cells_vals, cyto_vals),
                                        r2_score(cells_vals, nuc_vals),
                                        r2_score(cyto_vals, nuc_vals)))

# Merge the data with metadata, and do plate normalization

In [15]:
from pathlib import Path
path = os.getcwd()
base_dir = str(Path(path).parent)

metadata = load_data(base_dir + '/1.run-workflows/profiles/NCP_STEM_1/BR_NCP_STEM_1/BR_NCP_STEM_1_normalized.csv.gz')
metadata = metadata[['Metadata_Well', 'my_id']]

df shape: (384, 4023)


In [21]:
data = pd.read_csv(base_dir + '/1.run-workflows/profiles/NCP_PROGENITORS_1/BR_NCP_PROGENITORS_1.csv.gz')
data_cols = [x for x in data.columns if x.find('Costes')<0 if x.find('Manders')<0 if x.find('RWC')<0]
data = data[data_cols]
data = normalize(data)
data = pd.merge(data, metadata, left_on='Metadata_Well', right_on='Metadata_Well')

data = data.drop(['Metadata_Plate', 'Metadata_Well'], axis=1)
data.set_index('my_id', inplace=True)

#     Remove the extraneous 'control' or 'deletion' for the isogenic perturbs, to keep naming consistency.
new_idx = data.index.tolist()
new_idx = [i.split('_')[0]+'_'+ i.split('_')[1]+'_'+ i.split('_')[3]+'_' +i.split('_')[4] \
           if 'genic' in i else i for i in new_idx]

data.index = new_idx

In [22]:
data

Unnamed: 0,Cells_AreaShape_Area,Cells_AreaShape_Center_X,Cells_AreaShape_Center_Y,Cells_AreaShape_Center_Z,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_EulerNumber,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,Cells_AreaShape_MajorAxisLength,...,Nuclei_Texture_Variance_RNA_10_02,Nuclei_Texture_Variance_RNA_10_03,Nuclei_Texture_Variance_RNA_20_00,Nuclei_Texture_Variance_RNA_20_01,Nuclei_Texture_Variance_RNA_20_02,Nuclei_Texture_Variance_RNA_20_03,Nuclei_Texture_Variance_RNA_5_00,Nuclei_Texture_Variance_RNA_5_01,Nuclei_Texture_Variance_RNA_5_02,Nuclei_Texture_Variance_RNA_5_03
human_control_1_A01,0.031558,-0.083367,-0.376432,0.0,0.664552,0.391890,0.378463,-0.780011,-0.920009,0.608907,...,-0.843344,-0.840968,-0.644202,-0.602125,-0.644084,-0.607883,-0.849810,-0.848555,-0.848786,-0.847515
human_control_1_A02,-0.054664,-0.227870,-0.113078,0.0,-0.182981,0.057832,-0.104090,-0.077548,-0.245233,0.116378,...,-1.078804,-1.086466,-0.908879,-0.929953,-0.914194,-0.941841,-1.085625,-1.082379,-1.083933,-1.078993
human_deletion_30_A03,-0.161378,-0.030312,0.164265,0.0,-0.608366,-0.155454,-0.328369,0.346617,0.120644,-0.384417,...,-1.171375,-1.181463,-1.071824,-1.141703,-1.073357,-1.139353,-1.183812,-1.177876,-1.182150,-1.177294
human_deletion_30_A04,-0.141797,-0.179424,0.070165,0.0,-0.555260,-0.092694,-0.218886,0.322870,0.137112,-0.304189,...,-0.909145,-0.913576,-0.828592,-0.856107,-0.824114,-0.869313,-0.914348,-0.911870,-0.914511,-0.907822
human_control_9_A05,-0.069913,0.521924,-1.602826,0.0,1.036712,0.399706,-0.082652,-0.907102,-0.741057,0.329826,...,0.740509,0.735559,0.881529,1.041853,0.877049,1.149829,0.785405,0.765654,0.787896,0.762142
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
human_control_13_P20,0.016831,0.019864,0.241263,0.0,-0.263138,0.072305,-1.013906,0.055551,-0.241643,0.235123,...,-0.843951,-0.858732,-0.810453,-0.822422,-0.810289,-0.833615,-0.842753,-0.844238,-0.841745,-0.841695
human_deletion_35_P21,-0.006812,0.010482,-0.314606,0.0,-0.633877,0.010154,-0.197756,0.454381,0.406647,-0.441538,...,-1.181856,-1.168077,-1.387099,-1.434701,-1.396777,-1.461724,-1.192992,-1.189867,-1.191645,-1.190141
human_deletion_35_P22,-0.082822,0.274600,-0.218831,0.0,-0.706962,-0.052511,0.019505,0.503391,0.559702,-0.512312,...,-1.158347,-1.129080,-1.417583,-1.462290,-1.423246,-1.470335,-1.180942,-1.176140,-1.178161,-1.173465
human_control_22_P23,-0.158956,0.361959,0.089901,0.0,-0.394386,0.093896,0.132013,0.127585,0.194439,-0.309905,...,-0.607045,-0.593352,-0.672866,-0.687904,-0.667905,-0.695881,-0.616162,-0.615263,-0.614708,-0.611955


In [None]:
# # Exact same as cytominer normalize
# scaler = StandardScaler()
# scaler.fit(data)
# data_df = pd.DataFrame(scaler.transform(data), index=data.index, columns=data.columns)
# data_df

In [12]:
with pd.option_context("display.max_rows", 555):
#     display(data.sort_values(by='Cells_AreaShape_Area'))
    display(data)

Unnamed: 0,Cells_AreaShape_Area,Cells_AreaShape_Center_X,Cells_AreaShape_Center_Y,Cells_AreaShape_Center_Z,Cells_AreaShape_Compactness,Cells_AreaShape_Eccentricity,Cells_AreaShape_EulerNumber,Cells_AreaShape_Extent,Cells_AreaShape_FormFactor,Cells_AreaShape_MajorAxisLength,...,Nuclei_Texture_Variance_RNA_10_02,Nuclei_Texture_Variance_RNA_10_03,Nuclei_Texture_Variance_RNA_20_00,Nuclei_Texture_Variance_RNA_20_01,Nuclei_Texture_Variance_RNA_20_02,Nuclei_Texture_Variance_RNA_20_03,Nuclei_Texture_Variance_RNA_5_00,Nuclei_Texture_Variance_RNA_5_01,Nuclei_Texture_Variance_RNA_5_02,Nuclei_Texture_Variance_RNA_5_03
human_control_1_A01,0.031558,-0.083367,-0.376432,0.0,0.664552,0.39189,0.378463,-0.780011,-0.920009,0.608907,...,-0.843344,-0.840968,-0.644202,-0.602125,-0.644084,-0.607883,-0.84981,-0.848555,-0.848786,-0.847515
human_control_1_A02,-0.054664,-0.22787,-0.113078,0.0,-0.182981,0.057832,-0.10409,-0.077548,-0.245233,0.116378,...,-1.078804,-1.086466,-0.908879,-0.929953,-0.914194,-0.941841,-1.085625,-1.082379,-1.083933,-1.078993
human_deletion_30_A03,-0.161378,-0.030312,0.164265,0.0,-0.608366,-0.155454,-0.328369,0.346617,0.120644,-0.384417,...,-1.171375,-1.181463,-1.071824,-1.141703,-1.073357,-1.139353,-1.183812,-1.177876,-1.18215,-1.177294
human_deletion_30_A04,-0.141797,-0.179424,0.070165,0.0,-0.55526,-0.092694,-0.218886,0.32287,0.137112,-0.304189,...,-0.909145,-0.913576,-0.828592,-0.856107,-0.824114,-0.869313,-0.914348,-0.91187,-0.914511,-0.907822
human_control_9_A05,-0.069913,0.521924,-1.602826,0.0,1.036712,0.399706,-0.082652,-0.907102,-0.741057,0.329826,...,0.740509,0.735559,0.881529,1.041853,0.877049,1.149829,0.785405,0.765654,0.787896,0.762142
human_control_9_A06,0.006089,0.412618,0.225723,0.0,1.265076,0.523974,0.46832,-1.029449,-0.949079,0.610868,...,1.051806,1.020117,1.219725,1.448995,1.14732,1.319733,1.101653,1.080702,1.11336,1.089174
isogenic_deletion_26_A07,-0.091128,0.879262,0.263472,0.0,0.680538,0.307847,-0.80819,-0.669457,-0.56469,0.179426,...,0.810258,0.819445,0.915407,1.022595,0.975012,0.848794,0.834858,0.824715,0.823461,0.829635
isogenic_deletion_26_A08,-0.067618,0.338606,-0.834704,0.0,0.196808,0.222912,0.702126,-0.39389,-0.322354,0.139993,...,0.755354,0.773341,0.878893,0.890632,0.890654,0.982528,0.772276,0.772836,0.77474,0.765168
human_control_4_A09,-0.126587,0.185204,0.585579,0.0,-0.480269,-0.033032,0.112419,0.198445,0.181502,-0.213483,...,-0.786201,-0.789378,-0.653315,-0.611528,-0.644638,-0.60174,-0.787765,-0.789229,-0.787124,-0.787982
human_control_4_A10,-0.173647,-0.182555,-0.124774,0.0,-0.684254,-0.174887,-0.195662,0.426346,0.325238,-0.463273,...,-1.083186,-1.086067,-0.982136,-0.990693,-0.980093,-0.986162,-1.085875,-1.083753,-1.085922,-1.085787


# Stdev selection

In [None]:
data_df = var_threshold_selection(data)

In [None]:
%%time
data_df['group'] = data_df.index.str.split('_').map(lambda x: x[2])

# Remove high stdev features
# For each feature, get mean standrad dev
perturb_list = list(set(data_df['group'].tolist()))
feature_list = data_df.drop('group', axis=1).columns.tolist()
features_std_to_keep = []

keep_threshold = 0.5
mean_stdevs = []

for feature in feature_list:
    feature_stdevs = [] # list of stdevs per feature, should equal number of perturbations
    for perturb in perturb_list:
        temp = data_df[data_df['group'] == perturb][feature]
        temp_values = list(temp.values)
        feature_stdevs.append(np.std(temp_values))
    feature_mean_stdev = np.mean(feature_stdevs)
    mean_stdevs.append(feature_mean_stdev)
    if feature_mean_stdev < keep_threshold:
        features_std_to_keep.append(feature)
    # Reset feature stdevs for the next feature
    feature_stdevs = []
    
print(len(features_std_to_keep))

In [None]:
%%time
# Null distribution for stdev features
perturb_list = list(set(data_df['group'].tolist()))
feature_list = data_df.drop('group', axis=1).columns.tolist()

# Shuffle the groups
groups = data_df['group'].tolist()
random.Random(4).shuffle(groups)
data_df['group'] = groups

mean_stdevs_null = []
for feature in feature_list:
    feature_stdevs = [] # list of stdevs per feature, should equal number of perturbations
    for perturb in perturb_list:
        temp = data_df[data_df['group'] == perturb][feature]
        temp_values = list(temp.values)
        feature_stdevs.append(np.std(temp_values))
    feature_mean_stdev = np.mean(feature_stdevs)
    mean_stdevs_null.append(feature_mean_stdev)
    # Reset feature stdevs for the next feature
    feature_stdevs = []

In [None]:
print(np.percentile(mean_stdevs_null, 8))
# distplot of the mean stdevs for each feature
fig, ax = plt.subplots(figsize=(12,5))
sns.distplot(mean_stdevs, ax=ax)
sns.distplot(mean_stdevs_null, ax=ax)
plt.xlabel('StDev')

plt.title('Mean Standard Deviation by Feature')

In [None]:
data_stdev = data_df[features_std_to_keep]
data_stdev

# Remove highly correlated features 

In [None]:
# For a given feature name, get the grouped names
cat1 = ['Cells_AreaShape', 'Cytoplasm_AreaShape', 'Nuclei_AreaShape',
       'Cells_Neighbors', 'Cytoplasm_Neighbors', 'Nuclei_Neighbors',
       'Cells_Correlation', 'Cytoplasm_Correlation', 'Nuclei_Correlation']
cat2 = ['Location', 'Granularity', 'Texture', 'Intensity', 'RadialDistribution']

def get_group_feature_names(df, name_pruning_categories1, name_pruning_categories2):
    """
    features_df: the df of significant features with their values for a given gene or cluster
    categories1 is the AreaShape features ['Cells_AreaShape', 'Cells_Mean_A568Objects_AreaShape', 'Cells_Mean_Tubes_AreaShape', 
                           'Cytoplasm_AreaShape', 'Nuclei_AreaShape']
    categories2 is the other features ['Location', 'Granularity', 'Texture', 'Intensity', 'RadialDistribution']
    """
    if 'cluster_num' in df.columns:
        df = df.drop(['cluster_num'], axis=1)
    df = df.T
    df['split'] = df.index.str.split('_')
    df['group'] = ''
    channels = ['DNA', 'ER', 'Mito', 'RNA', 'AGP', 'Brightfield']
    # Categorize the AreaShape features
    for index, row in df.iterrows():
#         print(index)
        for name in name_pruning_categories1:
            if name in index:
                df.loc[index, 'group'] = name
        for name in name_pruning_categories2:
            for ch in channels:
                if name in row['split'] and ch in index[-30:]:
                    df.loc[index, 'group'] = name + '_' + ch

    return df

In [None]:
data_grouped = get_group_feature_names(data_stdev, cat1, cat2)
set(data_grouped['group'].tolist())

In [None]:
groups = data_grouped['group']
data_annotated = data_stdev.copy()
data_annotated.loc['metafeature'] = groups
data_annotated

In [None]:
# all tables
cells_AS = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Cells_AreaShape']].drop(['metafeature'], axis=0).astype('float64')
cyto_AS = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Cytoplasm_AreaShape']].drop(['metafeature'], axis=0).astype('float64')
nuc_AS = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Nuclei_AreaShape']].drop(['metafeature'], axis=0).astype('float64')

cells_corr = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Cells_Correlation']].drop(['metafeature'], axis=0).astype('float64')
cyto_corr = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Cytoplcorrm_Correlation']].drop(['metafeature'], axis=0).astype('float64')
nuc_corr = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Nuclei_Correlation']].drop(['metafeature'], axis=0).astype('float64')

cells_neighbors = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Cells_Neighbors']].drop(['metafeature'], axis=0).astype('float64')
cyto_neighbors = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Cytoplcorrm_Neighbors']].drop(['metafeature'], axis=0).astype('float64')
nuc_neighbors = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Nuclei_Neighbors']].drop(['metafeature'], axis=0).astype('float64')

agp_loc = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Location_AGP']].drop(['metafeature'], axis=0).astype('float64')
agp_gran = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Granularity_AGP']].drop(['metafeature'], axis=0).astype('float64')
agp_tex = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Texture_AGP']].drop(['metafeature'], axis=0).astype('float64')
agp_int = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Intensity_AGP']].drop(['metafeature'], axis=0).astype('float64')
agp_rdist = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='RadialDistribution_AGP']].drop(['metafeature'], axis=0).astype('float64')

Brightfield_loc = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Location_Brightfield']].drop(['metafeature'], axis=0).astype('float64')
Brightfield_gran = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Granularity_Brightfield']].drop(['metafeature'], axis=0).astype('float64')
Brightfield_tex = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Texture_Brightfield']].drop(['metafeature'], axis=0).astype('float64')
Brightfield_int = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Intensity_Brightfield']].drop(['metafeature'], axis=0).astype('float64')
Brightfield_rdist = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='RadialDistribution_Brightfield']].drop(['metafeature'], axis=0).astype('float64')

dna_loc = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Location_DNA']].drop(['metafeature'], axis=0).astype('float64')
dna_gran = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Granularity_DNA']].drop(['metafeature'], axis=0).astype('float64')
dna_tex = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Texture_DNA']].drop(['metafeature'], axis=0).astype('float64')
dna_int = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Intensity_DNA']].drop(['metafeature'], axis=0).astype('float64')
dna_rdist = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='RadialDistribution_DNA']].drop(['metafeature'], axis=0).astype('float64')

ER_loc = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Location_ER']].drop(['metafeature'], axis=0).astype('float64')
ER_gran = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Granularity_ER']].drop(['metafeature'], axis=0).astype('float64')
ER_tex = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Texture_ER']].drop(['metafeature'], axis=0).astype('float64')
ER_int = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Intensity_ER']].drop(['metafeature'], axis=0).astype('float64')
ER_rdist = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='RadialDistribution_ER']].drop(['metafeature'], axis=0).astype('float64')

Mito_loc = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Location_Mito']].drop(['metafeature'], axis=0).astype('float64')
Mito_gran = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Granularity_Mito']].drop(['metafeature'], axis=0).astype('float64')
Mito_tex = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Texture_Mito']].drop(['metafeature'], axis=0).astype('float64')
Mito_int = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Intensity_Mito']].drop(['metafeature'], axis=0).astype('float64')
Mito_rdist = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='RadialDistribution_Mito']].drop(['metafeature'], axis=0).astype('float64')

RNA_loc = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Location_RNA']].drop(['metafeature'], axis=0).astype('float64')
RNA_gran = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Granularity_RNA']].drop(['metafeature'], axis=0).astype('float64')
RNA_tex = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Texture_RNA']].drop(['metafeature'], axis=0).astype('float64')
RNA_int = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='Intensity_RNA']].drop(['metafeature'], axis=0).astype('float64')
RNA_rdist = data_annotated[data_annotated.columns[data_annotated.loc['metafeature']=='RadialDistribution_RNA']].drop(['metafeature'], axis=0).astype('float64')


In [None]:
tables = [cells_AS, cyto_AS, nuc_AS,
         cells_neighbors, cyto_neighbors, nuc_neighbors,
         cells_corr, cyto_corr, nuc_corr,
         agp_loc, agp_gran, agp_tex, agp_int, agp_rdist,
         Brightfield_loc,Brightfield_gran, Brightfield_tex, Brightfield_int, Brightfield_rdist,
         dna_loc, dna_gran, dna_tex, dna_int, dna_rdist,
         ER_loc, ER_gran, ER_tex, ER_int, ER_rdist,
         Mito_loc,Mito_gran, Mito_tex, Mito_int, Mito_rdist,
         RNA_loc, RNA_gran, RNA_tex, RNA_int, RNA_rdist]
ct = 0
for i in tables:
    ct+= i.shape[1]    
ct

In [None]:
feature_selected = pd.DataFrame(index=data_stdev.index)

for i in tables:
    corr_selected = corr_threshold_selection(i, 0.9)
    feature_selected = pd.concat([feature_selected, corr_selected], axis=1)

feature_selected

In [None]:
# feature_selected.to_csv('C:/Users/Ruifan/neuronal-cell-painting/feature_sets/Progenitors/0714_stdev_corr_fs.csv')