# Comparison of Corresponding Values in Toxin-Actin Datasets

This notebook compares the `m1` and `m2` values across three datasets: **Ordered**, **Mutant Ordered**, and **Disordered** observations of toxin-actin interactions. Boxplots are used to visualize the distribution of `m1` and `m2` values across these different conditions.



Explanation

	•	Unmodified data_configurations: This code directly reads each entry from data_configurations without modifications, ensuring that no information is lost.
	•	Dynamic Loading and Verification: Each file is loaded if it exists; otherwise, a message is printed indicating the missing file.
	•	Data Aggregation: data_M1, data_M2, and conditions capture all relevant data for each condition.
	•	Plotting: Horizontal boxplots are created for m1 and m2, with annotations indicating the number of data points for each condition.
	•	Saving and Displaying: The final plot is saved as a PDF for all conditions and displayed.

This code will produce a comprehensive set of boxplots that include every configuration listed in data_configurations.

## Importing Libraries

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import re

## Define the working directory

In [None]:
wd = '/home/jovyan/LNMA/bravoa/data/'

## Data configurations

In [None]:

# Example data_configurations
data_configurations = {
    "Toxin_vs_actin": {"df_name": "Toxin_vs_actin"},
    "Toxin_vs_Nucleus_a": {"df_name": "Toxin_vs_Nucleus_a"},
    "Toxin_vs_actin_ord": {"df_name": "Toxin_vs_actin_ord"},
    "Toxin_vs_actin_desord": {"df_name": "Toxin_vs_actin_desord"},
    "Toxin_vs_actin_mutord": {"df_name": "Toxin_vs_actin_mutord"},
    "Toxin_vs_Nucleus_ER": {"df_name": "Toxin_vs_Nucleus_ER"},
    "Toxin_vs_ER": {"df_name": "Toxin_vs_ER"},
    "Toxin_vs_Nucleus_L": {"df_name": "Toxin_vs_Nucleus_L"},
    "Toxin_vs_Lizo": {"df_name": "Toxin_vs_Lizo"},
    "Toxin_vs_Nucleus_M": {"df_name": "Toxin_vs_Nucleus_M"},
    "Toxin_vs_Mito": {"df_name": "Toxin_vs_Mito"},
    "Toxin_vs_Nucleus_E": {"df_name": "Toxin_vs_Nucleus_E"},
    "Toxin_vs_Endo": {"df_name": "Toxin_vs_Endo"},
    "Toxin_vs_EndoEarly": {"df_name": "Toxin_vs_EndoEarly"},
    "Toxin_vs_mutEndoEarly": {"df_name": "Toxin_vs_mutEndoEarly"},
    "Vesicles_Toxin_vs_actin": {"df_name": "Vesicles_Toxin_vs_actin"},
    "Vesicles_Toxin_vs_ER": {"df_name": "Vesicles_Toxin_vs_ER"},
    "Vesicles_Toxin_vs_Lizo": {"df_name": "Vesicles_Toxin_vs_Lizo"},
    "Vesicles_Toxin_vs_Mito": {"df_name": "Vesicles_Toxin_vs_Mito"},
    "Vesicles_Toxin_vs_Endo": {"df_name": "Vesicles_Toxin_vs_Endo"}
}

# Configurations for the categories
vesicles_configurations = {
    "actin": data_configurations["Vesicles_Toxin_vs_actin"],
    "ER": data_configurations["Vesicles_Toxin_vs_ER"],
    "lysosomes": data_configurations["Vesicles_Toxin_vs_Lizo"],
    "mitochondria": data_configurations["Vesicles_Toxin_vs_Mito"],
    "endosomes": data_configurations["Vesicles_Toxin_vs_Endo"]
}

nucleus_configurations = {
    "a": data_configurations["Toxin_vs_Nucleus_a"],
    "ER": data_configurations["Toxin_vs_Nucleus_ER"],
    "lysosomes": data_configurations["Toxin_vs_Nucleus_L"],
    "mitochondria": data_configurations["Toxin_vs_Nucleus_M"],
    "endosomes": data_configurations["Toxin_vs_Nucleus_E"]
}

cytoplasm_configurations = {
    "actin": data_configurations["Toxin_vs_actin"],
    "ER": data_configurations["Toxin_vs_ER"],
    "lysosomes": data_configurations["Toxin_vs_Lizo"],
    "mitochondria": data_configurations["Toxin_vs_Mito"],
    "endosomes": data_configurations["Toxin_vs_Endo"]
}

actin_configurations = {
    "Cry11Aa-polymerized Actin": data_configurations["Toxin_vs_actin_ord"],
    "Cry11Aa-depolymerized": data_configurations["Toxin_vs_actin_desord"],
    "Cry11AaE97A-polymerized Actin": data_configurations["Toxin_vs_actin_mutord"]
}

endo_configurations = {
    "Cry11Aa": data_configurations["Toxin_vs_EndoEarly"],
    "Cry11AaE97A": data_configurations["Toxin_vs_mutEndoEarly"]
}


## Define Functions

In [None]:
def plot_single_category(ax, configurations, category_name, max_conditions, custom_labels=None):
    data_M1 = []
    data_M2 = []
    conditions = []

    # Load data for each configuration in the category
    for config_name, config in configurations.items():
        file_path = os.path.join(wd, f"{config['df_name']}_all_manders_results.csv")
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            if 'm1' in df.columns and 'm2' in df.columns:
                data_M1.append(df['m1'].tolist())
                data_M2.append(df['m2'].tolist())
                conditions.append(config_name)
        else:
            print(f"File not found: {file_path}")

    # If custom_labels are provided, pad data to match lengths
    if custom_labels:
        while len(data_M1) < len(custom_labels):
            data_M1.append([])
            data_M2.append([])
        conditions = custom_labels

    # Add empty labels and data to standardize heights
    while len(conditions) < max_conditions:
        conditions.append("")
        data_M1.append([])
        data_M2.append([])

    # Define positions to standardize widths
    positions = range(1, len(conditions) + 1)

    # Boxplot for M1
    ax[0].boxplot(data_M1, labels=conditions, vert=False, positions=positions)
    ax[0].set_title(f"{category_name} M1")
    ax[0].set_xlim(0.2, 1)
    ax[0].set_ylim(0.5, max_conditions + 0.5)
    for i, data in enumerate(data_M1):
        if data:
            ax[0].text(0.9, i + 1, f'n={len(data)}', va='center', fontsize=8)

    # Boxplot for M2
    ax[1].boxplot(data_M2, labels=[""] * len(conditions), vert=False, positions=positions)  # Empty labels for M2
    ax[1].set_title(f"{category_name} M2")
    ax[1].set_xlim(0.2, 1)
    ax[1].set_ylim(0.5, max_conditions + 0.5)
    for i, data in enumerate(data_M2):
        if data:
            ax[1].text(0.9, i + 1, f'n={len(data)}', va='center', fontsize=8)

## Determine the maximum number of conditions

In [None]:
max_conditions = max(len(vesicles_configurations), len(nucleus_configurations), 
                     len(cytoplasm_configurations), len(actin_configurations), len(endo_configurations))


## Custom labels for specific categories

In [None]:
actin_labels = ["Cry11Aa-polymerized Actin", "Cry11Aa-depolymerized actin", "Cry11AaE97A-polymerized Actin"]
endo_labels = ["Cry11Aa", "Cry11AaE97A"]

## Create the plots

In [None]:
fig, axes = plt.subplots(5, 2, figsize=(10, 15))  # Adjusted size for 5 categories
plt.subplots_adjust(hspace=0.4)

plot_single_category(axes[0], vesicles_configurations, "Extracellular Vesicles", max_conditions)
plot_single_category(axes[1], nucleus_configurations, "Nucleus", max_conditions)
plot_single_category(axes[2], cytoplasm_configurations, "Cytoplasm", max_conditions)
plot_single_category(axes[3], actin_configurations, "Microvilli", max_conditions, custom_labels=actin_labels)
plot_single_category(axes[4], endo_configurations, "Endosomes", max_conditions, custom_labels=endo_labels)
# Save the combined figure as a PDF
pdf_path = os.path.join(wd, "Manders_Coefficients_All_Categories.pdf")
fig.suptitle("Manders' Coefficients for All Categories", fontsize=16, y=0.92)
plt.savefig(pdf_path, format="pdf")
print(f"Saved combined plot as PDF: {pdf_path}")

plt.show()