In [1]:
import sys
sys.path.append("../scripts")

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from load import load_config
from preprocess import preprocess_profiles
from plot_utils import plot_mean_feature_per_well

In [2]:
preprocess_config = load_config("../configs/preprocess")
preprocess_config

{'normalize_group': 'Metadata_Plate',
 'normalize_kwargs': {'method': 'mad_robustize',
  'mad_robustize_epsilon': 0.0,
  'image_features': True},
 'feature_select_whitelist': ['Cells_Count'],
 'feature_select_kwargs': {'operation': ['variance_threshold',
   'correlation_threshold',
   'drop_na_columns',
   'blocklist'],
  'unique_cut': 0.01,
  'image_features': True}}

In [3]:
figsave_path = "output/plate_visualizations"

features = [
    "Cells_AreaShape_Area",
    "Cells_Intensity_MeanIntensity_AGP",
    "Image_Threshold_SumOfEntropies_CellsIncludingEdges",
    "Cells_Count",
]

feature_select_ops = {
    "operation": ["drop_na_columns", "blocklist"],
    "image_features": True,
}

In [4]:
# profiles = pd.read_parquet("output/subset_profiles_cc_well_corrected.parquet")

profiles = {
    "raw_profiles": "../1.load/output/raw_filtered_profiles.parquet",
    "cc_adj_profiles": "../3.correct/output/profiles_cc_adjusted.parquet",
    "well_mean_subtracted": "../3.correct/output/full_profiles_mean_corrected.parquet",
    "cc_adj_well_mean_subtracted": "../3.correct/output/full_profiles_cc_adj_mean_corr.parquet",
}

# profiles = pd.read_parquet("output/subset_profiles_cc_corrected.parquet")
# profiles

### Visualize mean feature values per well position

In [5]:
for p in profiles:
    df = pd.read_parquet(profiles[p])

    df = preprocess_profiles(df, **preprocess_config)

    df["Cells_Average_Feature"] = df.filter(regex="^Cells_").mean(1)
    df["Nuclei_Average_Feature"] = df.filter(regex="^Nuclei_").mean(1)
    df["Cytoplasm_Average_Feature"] = df.filter(regex="^Cytoplasm_").mean(1)
    df["Image_Average_Feature"] = df.filter(regex="^Image_").mean(1)
    df["All_Average_Feature"] = df.filter(regex="^(?!Metadata_)").mean(1)

    features += [
        "Cells_Average_Feature",
        "Nuclei_Average_Feature",
        "Cytoplasm_Average_Feature",
        "Image_Average_Feature",
        "All_Average_Feature",
    ]

    for feature in features:
        print(
            f"{p} {feature} correlation with Cells_Count: {df[feature].corr(df['Cells_Count'])}"
        )
        plot_mean_feature_per_well(df, feature, prefix=p, figsave_path=figsave_path)
        plt.show()