In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import anndata as ad
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
rxrx3_adata = ad.read_h5ad('rxrx3.h5ad')
rxrx3_adata.obs_names = rxrx3_adata.obs['well_id']

In [3]:
import re
import numpy as np
pattern = r'Plate\d+_(\w+-\d+_*_\d+_\w+\d+)_'

segmentation_results = []
for csv_file in Path('egfr-images3').rglob('*.csv'):
    well_id = re.findall(pattern, csv_file.stem)[0]
    well_data = rxrx3_adata[well_id, :].copy()
    if well_data.obs['perturbation_type'].values[0] == 'CRISPR':
        well_data.obs['treatment'] = well_data.obs['gene'].astype(str) + '_KO'
    df = pd.read_csv(csv_file)
    df['well_id'] = well_id
    avg_df = df.replace(np.nan, 0).groupby('well_id').mean()
    avg_df = pd.concat([well_data.obs, avg_df], axis=1)
    segmentation_results.append(avg_df)
segmentation_results = pd.concat(segmentation_results)


  segmentation_results = pd.concat(segmentation_results)


In [4]:
well_data.obs

Unnamed: 0_level_0,well_id,experiment_name,plate,address,gene,treatment,SMILES,concentration,perturbation_type,cell_type
well_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
gene-081_6_O39,gene-081_6_O39,gene-081,6,O39,EGFR,EGFR_KO,,,CRISPR,HUVEC


In [14]:
from matplotlib.backends.backend_pdf import PdfPages

df = segmentation_results.melt(id_vars=['well_id', 'treatment', 'experiment_name', 'plate', 'address', 'gene', 'SMILES', 'concentration', 'perturbation_type', 'cell_type'])


variables_to_plot = ['cell_area', 'cell_perimeter', 'cell_eccentricity', 'cell_mean_intensity', 'cell_texture_entropy',
                     'nucleus_area', 'nucleus_perimeter', 'nucleus_eccentricity', 'nucleus_mean_intensity', 
                     'nucleus_texture_entropy', 'mitochondria_area', 'mitochondria_perimeter', 
                     'mitochondria_eccentricity', 'mitochondria_mean_intensity', 'golgi_area', 'golgi_perimeter',
                     'golgi_eccentricity', 'golgi_mean_intensity', 'mito_distance_to_nucleus', 
                     'golgi_distance_to_nucleus', 'mito_distance_to_golgi', 'mito_golgi_overlap', 
                     'mito_nucleus_overlap', 'golgi_nucleus_overlap', 'manders_mito_golgi', 'pearson_mito_golgi', 
                     'distance_mito_membrane', 'distance_golgi_membrane', 'distance_nucleus_membrane']

# open a pdf file to save all plots
with PdfPages('segmentation_histograms.pdf') as pdf:
    # create a figure and loop through each variable to plot
    for variable in variables_to_plot:
        _ = plt.figure(figsize=(10, 6))  # set figure size
        _ = sns.histplot(df.query(f'variable == "{variable}"'), x='value', hue='treatment')
        
        _ = plt.title(variable, fontsize=16)  # add title with variable name
        _ = plt.tight_layout()  # ensure everything fits well
        
        # save the current figure to the pdf
        pdf.savefig()
        plt.close()  # close the figure to free memory

print("All plots have been saved to 'segmentation_histograms.pdf'")


All plots have been saved to 'segmentation_histograms.pdf'


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# Assuming df is your melted dataframe as before

# variables_to_plot = ['cell_area', 'cell_perimeter', 'cell_eccentricity', 'nucleus_area', 'nucleus_perimeter']  # Reduced list for quicker execution
variables_to_plot = ['cell_area', 'cell_perimeter', 'cell_eccentricity', 'cell_mean_intensity', 'cell_texture_entropy',
                     'nucleus_area', 'nucleus_perimeter', 'nucleus_eccentricity', 'nucleus_mean_intensity', 
                     'nucleus_texture_entropy', 'mitochondria_area', 'mitochondria_perimeter', 
                     'mitochondria_eccentricity', 'mitochondria_mean_intensity', 'golgi_area', 'golgi_perimeter',
                     'golgi_eccentricity', 'golgi_mean_intensity', 'mito_distance_to_nucleus', 
                     'golgi_distance_to_nucleus', 'mito_distance_to_golgi', 'mito_golgi_overlap', 
                     'mito_nucleus_overlap', 'golgi_nucleus_overlap', 'manders_mito_golgi', 'pearson_mito_golgi', 
                     'distance_mito_membrane', 'distance_golgi_membrane', 'distance_nucleus_membrane']

with PdfPages('segmentation_plots_colored_treatments.pdf') as pdf:
    for variable in variables_to_plot:
        plt.figure(figsize=(16, 8))
        
        var_df = df[df['variable'] == variable]
        
        # Create a new column combining treatment and concentration
        var_df['treatment_conc'] = var_df['treatment'] + ' (' + var_df['concentration'].astype(str) + ')'
        
        # Create the box plot with color-coded treatments
        _ = sns.boxplot(x='treatment_conc', y='value', data=var_df, hue='treatment', palette='Set2')
        
        _ = plt.title(f'{variable} by Treatment and Concentration', fontsize=16)
        _ = plt.xlabel('Treatment (Concentration)', fontsize=12)
        _ = plt.ylabel(variable, fontsize=12)
        _ = plt.xticks(rotation=90, ha='right')
        
        # Adjust legend
        _ = plt.legend(title='Treatment', bbox_to_anchor=(1.05, 1), loc='upper left')
        
        _ = plt.tight_layout()
        pdf.savefig()
        plt.close()

print("Plots saved to 'segmentation_plots_colored_treatments.pdf'")

In [16]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import pandas as pd

# Assuming df is your melted dataframe as before

variables_to_plot = ['cell_area', 'cell_perimeter', 'cell_eccentricity', 'cell_mean_intensity', 'cell_texture_entropy',
                     'nucleus_area', 'nucleus_perimeter', 'nucleus_eccentricity', 'nucleus_mean_intensity', 
                     'nucleus_texture_entropy', 'mitochondria_area', 'mitochondria_perimeter', 
                     'mitochondria_eccentricity', 'mitochondria_mean_intensity', 'golgi_area', 'golgi_perimeter',
                     'golgi_eccentricity', 'golgi_mean_intensity', 'mito_distance_to_nucleus', 
                     'golgi_distance_to_nucleus', 'mito_distance_to_golgi', 'mito_golgi_overlap', 
                     'mito_nucleus_overlap', 'golgi_nucleus_overlap', 'manders_mito_golgi', 'pearson_mito_golgi', 
                     'distance_mito_membrane', 'distance_golgi_membrane', 'distance_nucleus_membrane']

with PdfPages('segmentation_plots_sorted_concentrations.pdf') as pdf:
    for variable in variables_to_plot:
        plt.figure(figsize=(18, 10))
        
        var_df = df[df['variable'] == variable].copy()
        
        # Ensure concentration is numeric for sorting
        var_df['concentration'] = pd.to_numeric(var_df['concentration'], errors='coerce')
        
        # Sort the dataframe by treatment and concentration
        var_df = var_df.sort_values(['treatment', 'concentration'])
        
        # Create a new column combining treatment and concentration
        var_df['treatment_conc'] = var_df['treatment'] + ' (' + var_df['concentration'].astype(str) + ')'
        
        # Get the order for x-axis based on sorted dataframe
        order = var_df['treatment_conc'].unique()
        
        # Create the box plot with color-coded treatments and specified order
        _ = sns.boxplot(x='treatment_conc', y='value', data=var_df, hue='treatment', palette='Set2', order=order)
        
        _ = plt.title(f'{variable} by Treatment and Concentration', fontsize=16)
        _ = plt.xlabel('Treatment (Concentration)', fontsize=12)
        _ = plt.ylabel(variable, fontsize=12)
        _ = plt.xticks(rotation=90, ha='right')
        
        # Adjust legend
        _ = plt.legend(title='Treatment', bbox_to_anchor=(1.05, 1), loc='upper left')
        
        _ = plt.tight_layout()
        pdf.savefig()
        plt.close()

print("Plots saved to 'segmentation_plots_sorted_concentrations.pdf'")

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

<Figure size 1800x1000 with 0 Axes>

Plots saved to 'segmentation_plots_sorted_concentrations.pdf'
