# Analysis

## Imports

In [1]:
import sys
import os
import pandas as pd

# Get the current working directory of the notebook
notebook_dir = os.path.abspath(os.getcwd())

# Construct the path to the 'src' directory
ncp_src_path = os.path.abspath(os.path.join(notebook_dir, "..", "ncp", "src"))

# Add the 'src' directory to sys.path
if ncp_src_path not in sys.path:
    sys.path.append(ncp_src_path)

# Now you can import the modules
from analysis import perform_and_save_analysis

from visualization import load_and_print_results


# Suppressing warnings for cleaner output
import warnings

warnings.filterwarnings("ignore")

## Functions

## Load and clean data

In [2]:
data_path = "output/ncp_augmented.parquet"
df = pd.read_parquet(data_path)

# select only rows where the Metadata_line_source is "human"
df = df.query("Metadata_line_source == 'human'")

## Analyze

In [3]:
feature_cols = df.columns[df.columns.str.contains("Cells_|Cytoplasm_|Nuclei_")].tolist()

# HACK to reduce the number of features
feature_cols = feature_cols[:30]

## Control vs Deletion, per cell type

In [4]:
perform_and_save_analysis(
    df=df,
    category_col="Metadata_cell_type",
    target_col="Metadata_line_condition",
    target_col_mapping_dict={"control": 0, "deletion": 1},
    feature_cols=feature_cols,
    output_dir="output/analysis_results/control_vs_deletion/",
)

Analyzing category: stem
Analyzing category: progen
Analyzing category: neuron
Analyzing category: astro
Analysis complete. Summary results saved to output/analysis_results/control_vs_deletion/summary_results.parquet


## Cell type A vs B, per condition

In [5]:
columns_to_drop_stem_cell = [
    "Nuclei_ObjectSkeleton_NumberNonTrunkBranches_CellImageSkel",
    "Nuclei_ObjectSkeleton_NumberBranchEnds_CellImageSkel",
    "Nuclei_ObjectSkeleton_TotalObjectSkeletonLength_CellImageSkel",
]

In [6]:
from itertools import combinations

# Define the cell types
cell_types = ["stem", "neuron", "progen", "astro"]

# Create all unique pairs of cell types
cell_type_pairs = list(combinations(cell_types, 2))

# Perform analysis for each pair
for cell_type_0, cell_type_1 in cell_type_pairs:
    # Work with a copy of the dataframe to avoid modifying the original
    df_copy = df.copy()

    # Drop columns specified in the list `columns_to_drop_stem_cell` if either cell_type_0 or cell_type_1 is "stem"
    if "stem" in [cell_type_0, cell_type_1]:
        # Drop only if columns exist
        cols_to_drop = [
            col for col in columns_to_drop_stem_cell if col in df_copy.columns
        ]
        df_copy = df_copy.drop(columns=cols_to_drop)

    # Define the mapping for the current pair
    target_col_mapping_dict = {cell_type_0: 0, cell_type_1: 1}

    # Define the output directory based on the current pair
    output_dir = (
        f"output/analysis_results/cell_type_a_vs_b/{cell_type_0}_vs_{cell_type_1}/"
    )

    # Make sure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Call the analysis function for the current pair using the modified copy of the dataframe
    perform_and_save_analysis(
        df=df_copy,
        category_col="Metadata_line_condition",
        target_col="Metadata_cell_type",
        target_col_mapping_dict=target_col_mapping_dict,
        feature_cols=feature_cols,
        output_dir=output_dir,
    )

Analyzing category: control
Analyzing category: deletion
Analysis complete. Summary results saved to output/analysis_results/cell_type_a_vs_b/stem_vs_neuron/summary_results.parquet
Analyzing category: control
Analyzing category: deletion
Analysis complete. Summary results saved to output/analysis_results/cell_type_a_vs_b/stem_vs_progen/summary_results.parquet
Analyzing category: control
Analyzing category: deletion
Analysis complete. Summary results saved to output/analysis_results/cell_type_a_vs_b/stem_vs_astro/summary_results.parquet
Analyzing category: control
Analyzing category: deletion
Analysis complete. Summary results saved to output/analysis_results/cell_type_a_vs_b/neuron_vs_progen/summary_results.parquet
Analyzing category: control
Analyzing category: deletion
Analysis complete. Summary results saved to output/analysis_results/cell_type_a_vs_b/neuron_vs_astro/summary_results.parquet
Analyzing category: control
Analyzing category: deletion
Analysis complete. Summary results s