# Data analysis

In [None]:
import pandas as pd
import os
import re # For regular expressions
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, f_oneway
import colorsys
import matplotlib.colors as mcolors
import numpy as np

# --- Configuration ---
BASE_DIR_3D = '/home/kirill/Desktop/For_Kirill/Iba1_Morpho_BB_Blind (Copy)'
GROUP_INFO_FILE_3D = '/home/kirill/Desktop/For_Kirill/iba1 Morpho_group.csv'
METRICS_FILENAME_3D = 'metrics_df_ramified.csv'

BASE_DIR_2D = '/home/kirill/Desktop/For_Kirill/microglial distance_2D_tiff (Copy)'
METRICS_FILENAME_2D = 'metrics_df_ramified_2d.csv'

EXPORT_BASE_DIRECTORY = '/home/kirill/Desktop/For_Kirill/Takeshi_analysis_combined'

if not os.path.exists(EXPORT_BASE_DIRECTORY):
    os.makedirs(EXPORT_BASE_DIRECTORY)
    print(f"Created base export directory: {EXPORT_BASE_DIRECTORY}")

ORIGINAL_CUSTOM_PALETTE = ['#00312F', '#1D2D46', '#46000D', '#5F3920', '#573844', '#424313', '#7A7A30', '#307A7A']
BOXPLOT_WIDTH = 0.2
BOXPLOT_LINEWIDTH = 3.0
STRIPPLOT_JITTER = BOXPLOT_WIDTH * 0.4 # Jitter relative to the narrow box width
STRIPPLOT_SIZE = 4 # Slightly smaller points for narrow boxes
all_stats_records = []

# --- Helper Functions (make_pastel, format_axis_label, get_significance_asterisks, format_p_value_for_display) ---
def make_pastel(hex_color, lightness_scale=0.7, saturation_scale=0.6):
    try:
        rgb_normalized = mcolors.to_rgb(hex_color)
        h, l, s = colorsys.rgb_to_hls(rgb_normalized[0], rgb_normalized[1], rgb_normalized[2])
        l_pastel = l + (1.0 - l) * lightness_scale; l_pastel = min(1.0, max(0.0, l_pastel))
        s_pastel = s * saturation_scale; s_pastel = min(1.0, max(0.0, s_pastel))
        rgb_pastel_normalized = colorsys.hls_to_rgb(h, l_pastel, s_pastel)
        return mcolors.to_hex(rgb_pastel_normalized)
    except ValueError: return hex_color

def format_axis_label(label_text):
    parts = label_text.split('_'); formatted_parts = []
    for part in parts:
        part_lower = part.lower()
        if part_lower == "um": formatted_parts.append("µm" + part[2:])
        elif part_lower == "um2": formatted_parts.append("µm²")
        elif part_lower == "um3": formatted_parts.append("µm³")
        else: formatted_parts.append(part.capitalize())
    return " ".join(formatted_parts)

def get_significance_asterisks(p_value):
    if p_value is None: return ""
    if p_value < 0.001: return "***"
    if p_value < 0.01: return "**"
    if p_value < 0.05: return "*"
    return "n.s."

def format_p_value_for_display(p_value):
    if p_value is None: return ""
    sig = get_significance_asterisks(p_value)
    if sig == "n.s.": return f"n.s. (p={p_value:.3f})"
    if p_value < 0.001: return f"p < 0.001{sig}"
    return f"p={p_value:.3f}{sig}"

PASTEL_PALETTE = [make_pastel(color) for color in ORIGINAL_CUSTOM_PALETTE]
# --- END Helper Functions ---

# --- Data Loading and Processing Function ---
def load_and_process_dataset(dataset_name_tag, base_dir, metrics_filename, 
                             image_dir_pattern_func,
                             group_assignment_df_external=None, 
                             derive_group_from_image_id_func=None):
    print(f"\n--- Loading Dataset: {dataset_name_tag} ---")
    all_metrics_data = []
    if not os.path.isdir(base_dir):
        print(f"ERROR: Base directory '{base_dir}' for {dataset_name_tag} not found.")
        return pd.DataFrame()
    try:
        potential_dirs = os.listdir(base_dir)
        image_id_dirs = sorted([d for d in potential_dirs if image_dir_pattern_func(d, base_dir)])
    except Exception as e:
        print(f"ERROR: Could not list or filter directories in '{base_dir}': {e}")
        return pd.DataFrame()
    print(f"Found {len(image_id_dirs)} potential image directories for {dataset_name_tag}.")

    for image_id in image_id_dirs:
        processed_folder_name = f"{image_id}_processed_{metrics_filename.replace('metrics_df_', '').replace('.csv', '')}"
        metrics_file_path = os.path.join(base_dir, image_id, processed_folder_name, metrics_filename)
        
        if os.path.exists(metrics_file_path):
            try:
                df = pd.read_csv(metrics_file_path)
                if df.empty: continue
                df['Image_ID_Full'] = image_id 
                df['Dataset_Tag'] = dataset_name_tag
                if derive_group_from_image_id_func:
                    df['Experimental_Group'] = df['Image_ID_Full'].apply(derive_group_from_image_id_func)
                all_metrics_data.append(df)
            except Exception as e:
                print(f"Error loading metrics for {image_id} from {metrics_file_path}: {e}. Skipping.")
    if not all_metrics_data:
        print(f"No metrics data loaded for {dataset_name_tag}.")
        return pd.DataFrame()
    dataset_df = pd.concat(all_metrics_data, ignore_index=True)
    
    if group_assignment_df_external is not None:
        dataset_df['Image_ID_For_Merge'] = dataset_df['Image_ID_Full']
        dataset_df = pd.merge(dataset_df, group_assignment_df_external, 
                              left_on='Image_ID_For_Merge', right_on='Image_ID',
                              how='left', suffixes=('', '_external'))
        current_group_col = 'Experimental_Group' if 'Experimental_Group' in dataset_df.columns else None
        external_group_col = 'Experimental_Group_external' if 'Experimental_Group_external' in dataset_df.columns else None
        if external_group_col:
            if not current_group_col: dataset_df['Experimental_Group'] = dataset_df[external_group_col]
            else: dataset_df['Experimental_Group'] = np.where(dataset_df[external_group_col].notna(), dataset_df[external_group_col], dataset_df['Experimental_Group'])
            dataset_df.drop(columns=[external_group_col], inplace=True)
        if 'Image_ID_For_Merge' in dataset_df.columns: dataset_df.drop(columns=['Image_ID_For_Merge'], inplace=True)
        if 'Image_ID_y' in dataset_df.columns: dataset_df.drop(columns=['Image_ID_y'], inplace=True)
        if 'Image_ID_x' in dataset_df.columns: dataset_df.rename(columns={'Image_ID_x':'Image_ID'}, inplace=True)

    if 'Experimental_Group' not in dataset_df.columns or dataset_df['Experimental_Group'].isnull().all():
        print(f"Warning: No experimental groups assigned or derived for {dataset_name_tag}. Assigning 'DefaultGroup'.")
        dataset_df['Experimental_Group'] = f"{dataset_name_tag}_DefaultGroup"

    print(f"Finished loading {dataset_name_tag}: {len(dataset_df)} total cells from {dataset_df['Image_ID_Full'].nunique()} images.")
    print(f"Value counts for Experimental_Group in {dataset_name_tag}:\n{dataset_df['Experimental_Group'].value_counts(dropna=False)}")
    return dataset_df

def is_3d_image_dir(dir_name, base_path):
    return os.path.isdir(os.path.join(base_path, dir_name)) and len(dir_name) == 1 and dir_name.isalpha() and dir_name.isupper()
def is_2d_image_dir(dir_name, base_path):
    return os.path.isdir(os.path.join(base_path, dir_name)) and dir_name.startswith("iba1-")
def derive_2d_group(image_id_full):
    match = re.search(r"iba1-(\d+)(?:st|nd|rd|th)", image_id_full)
    if match: return f"2D_Batch{match.group(1)}"
    return "2D_Other"

# --- Generic plotting function ---
def plot_metric_boxplot(metric_col_name, data_df_source, dataset_tag_str, plot_title_suffix, 
                        all_stats_list_ref, export_directory_path, 
                        valid_groups_list, palette_map_orig_func, palette_map_pastel_func,
                        y_label_override=None, is_pre_aggregated=False):

    palette_map_orig = palette_map_orig_func(valid_groups_list)
    palette_map_pastel = palette_map_pastel_func(valid_groups_list)

    print(f"--- ({dataset_tag_str}) Analyzing {format_axis_label(metric_col_name)} ---")
    
    source_for_hist = data_df_source 
    if metric_col_name not in source_for_hist.columns or ('Experimental_Group' not in source_for_hist.columns and 'Comparison_Group' not in source_for_hist.columns) or \
       (('Experimental_Group' in source_for_hist.columns and source_for_hist['Experimental_Group'].isnull().all()) and \
        ('Comparison_Group' in source_for_hist.columns and source_for_hist['Comparison_Group'].isnull().all())):
        print(f"Skipping histogram for {metric_col_name} in {dataset_tag_str}: column or group info missing/all NaN.")
    else:
        fig_hist, ax_hist = plt.subplots(figsize=(12, 7))
        hist_plot_data_temp = source_for_hist.copy()
        group_col_for_hist = 'Experimental_Group'
        if 'Comparison_Group' in hist_plot_data_temp.columns and 'Experimental_Group' not in hist_plot_data_temp.columns:
            hist_plot_data_temp.rename(columns={'Comparison_Group': 'Experimental_Group'}, inplace=True)
        elif 'Comparison_Group' in hist_plot_data_temp.columns and 'Experimental_Group' in hist_plot_data_temp.columns:
             # If both exist, decide which one to use or if 'Comparison_Group' should take precedence for this specific plot
             # For now, assume 'Experimental_Group' is primary unless it's all NaN and Comparison_Group is not
            if hist_plot_data_temp['Experimental_Group'].isnull().all() and not hist_plot_data_temp['Comparison_Group'].isnull().all():
                 hist_plot_data_temp.rename(columns={'Comparison_Group': 'Experimental_Group'}, inplace=True)


        hist_hue_order = [g for g in valid_groups_list if g in palette_map_orig]
        if not hist_hue_order and valid_groups_list: hist_hue_order = valid_groups_list # Fallback if palette mismatch

        sns.histplot(data=hist_plot_data_temp.dropna(subset=['Experimental_Group', metric_col_name]), 
                     x=metric_col_name, hue='Experimental_Group', kde=True, multiple='stack', 
                     palette={g: palette_map_orig.get(g, '#cccccc') for g in hist_hue_order}, 
                     hue_order=hist_hue_order, ax=ax_hist)
        ax_hist.set_title(f'({dataset_tag_str}) Distribution of {format_axis_label(metric_col_name)}')
        ax_hist.set_xlabel(format_axis_label(metric_col_name)); ax_hist.set_ylabel('Count / Density')
        if hist_hue_order: 
            ax_hist.legend(title='Exp. Group', handles=[plt.Rectangle((0,0),1,1, color=palette_map_orig.get(g, '#cccccc')) for g in hist_hue_order], labels=hist_hue_order)
        plt.tight_layout()
        pdf_path_hist = os.path.join(export_directory_path, f'histogram_{dataset_tag_str}_{metric_col_name}.pdf')
        plt.savefig(pdf_path_hist, format='pdf', bbox_inches='tight'); plt.close(fig_hist)

    # Boxplot
    if is_pre_aggregated:
        agg_data = data_df_source.copy()
        if 'Comparison_Group' in agg_data.columns and 'Experimental_Group' not in agg_data.columns:
            agg_data = agg_data.rename(columns={'Comparison_Group': 'Experimental_Group'})
        elif 'Comparison_Group' in agg_data.columns and 'Experimental_Group' in agg_data.columns:
            if agg_data['Experimental_Group'].isnull().all() and not agg_data['Comparison_Group'].isnull().all():
                agg_data.drop(columns=['Experimental_Group'], inplace=True)
                agg_data.rename(columns={'Comparison_Group': 'Experimental_Group'}, inplace=True)


    else:
        id_col_for_agg = 'Image_ID_Full' if 'Image_ID_Full' in data_df_source.columns else 'Image_ID'
        if id_col_for_agg not in data_df_source.columns or 'Experimental_Group' not in data_df_source.columns:
             print(f"ERROR: Cannot aggregate for boxplot {metric_col_name} in {dataset_tag_str}, missing ID or Group column.")
             return
        agg_data = data_df_source.groupby([id_col_for_agg, 'Experimental_Group'])[metric_col_name].mean().reset_index()
    
    agg_data = agg_data.dropna(subset=['Experimental_Group', metric_col_name])
    if agg_data.empty:
        print(f"No aggregated data for boxplot {metric_col_name} in {dataset_tag_str}.")
        return
        
    n_counts = agg_data['Experimental_Group'].value_counts().reindex(valid_groups_list).fillna(0).astype(int)

    # Dynamic figure width for boxplots
    num_groups = len(valid_groups_list)
    if num_groups <= 3:
        fig_width = max(4, num_groups * 1.5) # Ensure min width, scale for few groups
    elif num_groups <=5:
        fig_width = num_groups * 1.2
    else:
        fig_width = num_groups * 1.0 # More groups, less increment per group
    fig_width = min(fig_width, 12) # Max width cap

    fig_box, ax_box = plt.subplots(figsize=(fig_width, 6)) # Dynamic width, fixed height
    
    box_plot_hue_order = [g for g in valid_groups_list if g in palette_map_pastel]
    if not box_plot_hue_order and valid_groups_list: box_plot_hue_order = valid_groups_list


    sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order, 
                palette={g: palette_map_pastel.get(g, '#dddddd') for g in box_plot_hue_order}, 
                showfliers=False, width=BOXPLOT_WIDTH, linewidth=BOXPLOT_LINEWIDTH, ax=ax_box)
    
    for g_name_plot in box_plot_hue_order:
        current_g_data_plot = agg_data[agg_data['Experimental_Group'] == g_name_plot]
        if not current_g_data_plot.empty:
            sns.stripplot(data=current_g_data_plot, x='Experimental_Group', y=metric_col_name, 
                          order=box_plot_hue_order, 
                          color=palette_map_orig.get(g_name_plot, '#333333'), 
                          alpha=0.9, jitter=STRIPPLOT_JITTER, size=STRIPPLOT_SIZE, ax=ax_box, dodge=False)
    
    y_max_overall = agg_data[metric_col_name].max() if not agg_data[metric_col_name].empty else 0
    min_overall_data = agg_data[metric_col_name].min() if not agg_data[metric_col_name].empty else 0
    max_n_text_y = -np.inf

    for i, g_name_text in enumerate(box_plot_hue_order):
        n_val = n_counts.get(g_name_text, 0)
        g_points = agg_data[agg_data['Experimental_Group'] == g_name_text][metric_col_name]
        text_y_val = y_max_overall * 1.02 
        if not g_points.empty:
            q75 = g_points.quantile(0.75); iqr = q75 - g_points.quantile(0.25)
            upper_w = q75 + 1.5 * iqr if iqr > 0 else q75
            max_strip = g_points.max() if not g_points.empty else q75
            text_y_val = max(upper_w, max_strip) * 1.05 
            if text_y_val == 0 and y_max_overall == 0: text_y_val = 0.05 * (abs(min_overall_data) if min_overall_data !=0 else 1) 
        elif y_max_overall == 0: text_y_val = 0.05 * (abs(min_overall_data) if min_overall_data !=0 else 1)
        ax_box.text(i, text_y_val, f"n={n_val}", ha='center', va='bottom', fontsize=9)
        max_n_text_y = max(max_n_text_y, text_y_val if text_y_val is not None else -np.inf)

    ax_box.set_title(f'({dataset_tag_str}) Avg {format_axis_label(metric_col_name)} {plot_title_suffix}')
    ax_box.set_xlabel('Exp. Group'); ax_box.set_ylabel(y_label_override if y_label_override else f'Avg {format_axis_label(metric_col_name)}')
    
    p_val = None; 
    y_lim_top = max_n_text_y * 1.05 if max_n_text_y > -np.inf else y_max_overall * 1.1 
    if y_lim_top == 0 and y_max_overall == 0 : y_lim_top = 0.1 * (abs(min_overall_data) if min_overall_data !=0 else 1)

    if len(box_plot_hue_order) == 2:
        g1n, g2n = box_plot_hue_order[0], box_plot_hue_order[1]
        g1d = agg_data[agg_data['Experimental_Group'] == g1n][metric_col_name].dropna()
        g2d = agg_data[agg_data['Experimental_Group'] == g2n][metric_col_name].dropna()
        if len(g1d) >= 2 and len(g2d) >= 2:
            s_stat, p_val = ttest_ind(g1d, g2d, nan_policy='omit', equal_var=False)
            all_stats_list_ref.append({'Dataset':dataset_tag_str, 'Metric': f"Avg {format_axis_label(metric_col_name)}", 'Comparison': f"{g1n} vs {g2n}", 'Test': 'T-test', 'Statistic': f"{s_stat:.3f}", 'P_Value': p_val, 'Significance': get_significance_asterisks(p_val)})
            
            current_max_data_for_bar = 0
            if not g1d.empty: current_max_data_for_bar = max(current_max_data_for_bar, g1d.max())
            if not g2d.empty: current_max_data_for_bar = max(current_max_data_for_bar, g2d.max())

            y_bar_start_ref = max_n_text_y if max_n_text_y > -np.inf else current_max_data_for_bar
            y_bar = y_bar_start_ref * 1.08 
            y_bar = max(y_bar, current_max_data_for_bar * 1.15) 
            if y_bar == 0 and current_max_data_for_bar == 0: y_bar = 0.08 * (abs(min_overall_data) if min_overall_data !=0 else 1)
            
            t_level = y_bar * 1.05 
            if t_level == 0 and y_bar == 0: t_level = 0.1 * (abs(min_overall_data) if min_overall_data !=0 else 1)
            
            tick_h_abs = (y_bar - (max_n_text_y if max_n_text_y > -np.inf and max_n_text_y != 0 else y_bar*0.9)) * 0.1 
            if tick_h_abs <= 0: tick_h_abs = y_bar * 0.02 if y_bar != 0 else 0.002 # Ensure tick_h_abs is positive
            
            ax_box.plot([0,1],[y_bar,y_bar],lw=1.5,c='k'); 
            ax_box.plot([0,0],[y_bar-tick_h_abs,y_bar],lw=1.5,c='k'); 
            ax_box.plot([1,1],[y_bar-tick_h_abs,y_bar],lw=1.5,c='k')
            ax_box.text(0.5, t_level, format_p_value_for_display(p_val), ha='center', va='bottom', fontsize=10)
            y_lim_top = max(y_lim_top, t_level*1.05) if t_level is not None else y_lim_top
    elif len(box_plot_hue_order) > 2 : 
        # ANOVA
        # Ensure 'Experimental_Group' is the correct column for grouping here
        # This was agg_data[agg_data['Experimental_Group'].isin(box_plot_hue_order)].groupby('Experimental_Group')
        # If agg_data always uses 'Experimental_Group' after potential rename, this is fine.
        samples_anova = [d[metric_col_name].dropna() for _, d in agg_data[agg_data['Experimental_Group'].isin(box_plot_hue_order)].groupby('Experimental_Group')]
        valid_samples_anova = [s for s in samples_anova if len(s) >=2]
        # Check if number of valid samples matches number of unique groups *being plotted*
        if len(valid_samples_anova) == len(agg_data[agg_data['Experimental_Group'].isin(box_plot_hue_order)]['Experimental_Group'].unique()):
            f_stat, p_val = f_oneway(*valid_samples_anova)
            all_stats_list_ref.append({'Dataset':dataset_tag_str, 'Metric': f"Avg {format_axis_label(metric_col_name)}", 'Comparison': f"Across {len(box_plot_hue_order)} groups", 'Test': 'ANOVA', 'Statistic': f"{f_stat:.3f}", 'P_Value': p_val, 'Significance': get_significance_asterisks(p_val)})
            # Optionally add ANOVA p-value text to plot
            # anova_text_y = max(y_lim_top, y_max_overall * 1.15) # Example positioning
            # ax_box.text(0.5, anova_text_y, f"ANOVA: {format_p_value_for_display(p_val)}", 
            #             transform=ax_box.get_xaxis_transform(), ha='center', va='bottom', fontsize=9)
            # y_lim_top = max(y_lim_top, anova_text_y * 1.05)


    current_ylim_b, current_ylim_t = ax_box.get_ylim()
    new_top_limit = max(current_ylim_t, y_lim_top) 
    if new_top_limit <= current_ylim_b : new_top_limit = current_ylim_b + (abs(current_ylim_b)*0.1 if current_ylim_b != 0 else 0.1) # Ensure some space
    if new_top_limit == current_ylim_b and new_top_limit == 0 : new_top_limit = 0.1 # Absolute floor for all-zero data
    ax_box.set_ylim(bottom=current_ylim_b, top=new_top_limit)
    
    plt.tight_layout()
    pdf_path_box = os.path.join(export_directory_path, f'boxplot_{dataset_tag_str}_{metric_col_name}.pdf')
    plt.savefig(pdf_path_box, format='pdf', bbox_inches='tight'); plt.close(fig_box)

# --- Palette generation functions (to pass to plotting function) ---
def get_original_palette(groups):
    return {g: ORIGINAL_CUSTOM_PALETTE[i % len(ORIGINAL_CUSTOM_PALETTE)] for i, g in enumerate(groups)}
def get_pastel_palette(groups):
    return {g: PASTEL_PALETTE[i % len(PASTEL_PALETTE)] for i, g in enumerate(groups)}

# --- Main Analysis Script ---
# 1. Load 3D Data
group_df_3d = None
try:
    group_df_raw_3d = pd.read_csv(GROUP_INFO_FILE_3D)
    group_df_raw_3d = group_df_raw_3d.rename(columns={'Unnamed: 0': 'Batch'})
    group_assignments_3d_list = []
    for _, row_3d in group_df_raw_3d.iterrows():
        if pd.notna(row_3d['Group1']): group_assignments_3d_list.append({'Image_ID': str(row_3d['Group1']).strip(), 'Experimental_Group': '3D_Group1'})
        if pd.notna(row_3d['Group2']): group_assignments_3d_list.append({'Image_ID': str(row_3d['Group2']).strip(), 'Experimental_Group': '3D_Group2'})
    group_df_3d = pd.DataFrame(group_assignments_3d_list)
except Exception as e_3d_group:
    print(f"Warning: Could not load or process 3D group assignments from '{GROUP_INFO_FILE_3D}': {e_3d_group}")

data_3d_df = load_and_process_dataset("3D", BASE_DIR_3D, METRICS_FILENAME_3D, 
                                      is_3d_image_dir, group_assignment_df_external=group_df_3d)

# 2. Analyze 3D Data
if not data_3d_df.empty:
    export_dir_3d = os.path.join(EXPORT_BASE_DIRECTORY, "3D_Analysis")
    if not os.path.exists(export_dir_3d): os.makedirs(export_dir_3d)
    
    valid_groups_3d = sorted(data_3d_df['Experimental_Group'].dropna().unique())
    if valid_groups_3d: 
        cells_3d_df_agg = data_3d_df.groupby(['Image_ID_Full', 'Experimental_Group']).size().reset_index(name='num_cells')
        plot_metric_boxplot(
            metric_col_name='num_cells', data_df_source=cells_3d_df_agg, dataset_tag_str="3D", 
            plot_title_suffix="per Image (Cell Count)", all_stats_list_ref=all_stats_records, 
            export_directory_path=export_dir_3d, valid_groups_list=valid_groups_3d, 
            palette_map_orig_func=get_original_palette, palette_map_pastel_func=get_pastel_palette, 
            y_label_override="Number of Cells", is_pre_aggregated=True
        )
        metrics_3d = ['shortest_distance_um', 'skan_num_branches', 'skan_total_length_um', 
                      'skan_avg_branch_length_um', 'skan_num_junctions', 'skan_num_endpoints', 
                      'sphericity', 'volume_um3']
        for metric in metrics_3d:
            if metric in data_3d_df.columns:
                plot_metric_boxplot(metric, data_3d_df, "3D", "per Image", all_stats_records, export_dir_3d, 
                                    valid_groups_3d, get_original_palette, get_pastel_palette, is_pre_aggregated=False)
    else: print("No valid groups for 3D data analysis.")


# 3. Load 2D Data
data_2d_df = load_and_process_dataset("2D", BASE_DIR_2D, METRICS_FILENAME_2D, 
                                      is_2d_image_dir, 
                                      derive_group_from_image_id_func=derive_2d_group)

# 4. Analyze 2D Data
if not data_2d_df.empty:
    export_dir_2d = os.path.join(EXPORT_BASE_DIRECTORY, "2D_Analysis")
    if not os.path.exists(export_dir_2d): os.makedirs(export_dir_2d)
    valid_groups_2d = sorted(data_2d_df['Experimental_Group'].dropna().unique())
    if valid_groups_2d:
        cells_2d_df_agg = data_2d_df.groupby(['Image_ID_Full', 'Experimental_Group']).size().reset_index(name='num_cells')
        plot_metric_boxplot(
            metric_col_name='num_cells', data_df_source=cells_2d_df_agg, dataset_tag_str="2D", 
            plot_title_suffix="per Image (Cell Count)", all_stats_list_ref=all_stats_records, 
            export_directory_path=export_dir_2d, valid_groups_list=valid_groups_2d, 
            palette_map_orig_func=get_original_palette, palette_map_pastel_func=get_pastel_palette, 
            y_label_override="Number of Cells", is_pre_aggregated=True
        )
        metrics_2d = ['shortest_distance_um', 'area_um2', 'perimeter_um', 'circularity', 
                      'eccentricity', 'solidity', 'major_axis_length_um', 'minor_axis_length_um',
                      'skan_num_branches', 'skan_total_length_um', 'skan_avg_branch_length_um',
                      'skan_num_junctions', 'skan_num_endpoints']
        for metric in metrics_2d:
            if metric in data_2d_df.columns:
                plot_metric_boxplot(metric, data_2d_df, "2D", "per Image", all_stats_records, export_dir_2d, 
                                    valid_groups_2d, get_original_palette, get_pastel_palette, is_pre_aggregated=False)
    else: print("No valid groups for 2D data analysis.")


# 5. Specific 3D Group1 vs. All 2D shortest_distance_um comparison
print("\n--- Comparing 3D Group1 vs ALL 2D (Shortest Distance) ---")
if not data_3d_df.empty and '3D_Group1' in data_3d_df['Experimental_Group'].unique() and not data_2d_df.empty:
    data_3d_g1_dist_agg = data_3d_df[data_3d_df['Experimental_Group'] == '3D_Group1'].groupby('Image_ID_Full')['shortest_distance_um'].mean().reset_index()
    data_3d_g1_dist_agg['Comparison_Group'] = '3D_Group1'
    
    data_2d_dist_agg = data_2d_df.groupby('Image_ID_Full')['shortest_distance_um'].mean().reset_index()
    data_2d_dist_agg['Comparison_Group'] = '2D_All'

    comparison_df = pd.concat([data_3d_g1_dist_agg, data_2d_dist_agg], ignore_index=True)
    comparison_valid_groups = ['3D_Group1', '2D_All']
    
    export_dir_comparison = os.path.join(EXPORT_BASE_DIRECTORY, "3D_vs_2D_Comparison")
    if not os.path.exists(export_dir_comparison): os.makedirs(export_dir_comparison)

    plot_metric_boxplot(
        metric_col_name='shortest_distance_um', data_df_source=comparison_df, 
        dataset_tag_str="3D-G1_vs_2D", plot_title_suffix="per Image", 
        all_stats_list_ref=all_stats_records, export_directory_path=export_dir_comparison, 
        valid_groups_list=comparison_valid_groups, 
        palette_map_orig_func=get_original_palette, palette_map_pastel_func=get_pastel_palette, 
        y_label_override=f"Avg {format_axis_label('shortest_distance_um')}", is_pre_aggregated=True
    )
else:
    print("Skipping 3D Group1 vs 2D comparison: Conditions not met.")


# --- Export All Statistics ---
if all_stats_records:
    stats_df = pd.DataFrame(all_stats_records)
    # Ensure P_Value is float for proper sorting/formatting if needed later
    if 'P_Value' in stats_df.columns:
        stats_df['P_Value'] = pd.to_numeric(stats_df['P_Value'], errors='coerce')
    if 'Statistic' in stats_df.columns:
        stats_df['Statistic'] = pd.to_numeric(stats_df['Statistic'], errors='coerce')

    stats_csv_path = os.path.join(EXPORT_BASE_DIRECTORY, 'summary_statistics_combined.csv')
    stats_df.to_csv(stats_csv_path, index=False, float_format='%.4g') # Format floats in CSV
    print(f"\n--- Combined summary statistics exported to: {stats_csv_path} ---")
else:
    print("\n--- No statistical tests were performed or recorded. ---")

print("\n--- Main Analysis and Export Complete ---")

Created base export directory: /home/kirill/Desktop/For_Kirill/Takeshi_analysis_combined

--- Loading Dataset: 3D ---
Found 14 potential image directories for 3D.
Finished loading 3D: 2108 total cells from 14 images.
Value counts for Experimental_Group in 3D:
Experimental_Group
3D_Group1    1064
3D_Group2    1044
Name: count, dtype: int64
--- (3D) Analyzing Num Cells ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Shortest Distance µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Skan Num Branches ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Skan Total Length µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Skan Avg Branch Length µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Skan Num Junctions ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Skan Num Endpoints ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Sphericity ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (3D) Analyzing Volume µm³ ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,



--- Loading Dataset: 2D ---
Found 9 potential image directories for 2D.
Finished loading 2D: 96 total cells from 9 images.
Value counts for Experimental_Group in 2D:
Experimental_Group
2D_Batch3    38
2D_Batch1    31
2D_Batch2    27
Name: count, dtype: int64
--- (2D) Analyzing Num Cells ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Shortest Distance µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Area µm² ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Perimeter µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Circularity ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Eccentricity ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Solidity ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Major Axis Length µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Minor Axis Length µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Skan Num Branches ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Skan Total Length µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Skan Avg Branch Length µm ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Skan Num Junctions ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,


--- (2D) Analyzing Skan Num Endpoints ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,



--- Comparing 3D Group1 vs ALL 2D (Shortest Distance) ---
--- (3D-G1_vs_2D) Analyzing Shortest Distance µm ---

--- Combined summary statistics exported to: /home/kirill/Desktop/For_Kirill/Takeshi_analysis_combined/summary_statistics_combined.csv ---

--- Main Analysis and Export Complete ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(data=agg_data, x='Experimental_Group', y=metric_col_name, order=box_plot_hue_order,
