### Cell Segmentation

In [None]:
from deepcell.applications import Mesmer
app = Mesmer()

In [None]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
fluorescence_dir = r"..\data\high_dose\0\fluorescence_tritc"
fluorescence_cell_square = cv2.imread(os.path.join(fluorescence_dir, "stitched.png"), cv2.IMREAD_GRAYSCALE)

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

# === Updated Functions ===

# Split image into variable-size tiles (no padding)
def split_into_tiles(image, tile_size=1000):
    tiles = []
    coords = []
    for i in range(0, image.shape[0], tile_size):
        for j in range(0, image.shape[1], tile_size):
            tile = image[i:i+tile_size, j:j+tile_size]
            tiles.append(tile)
            coords.append((i, j))  # Store top-left corner for reconstruction
    return tiles, coords

# Reconstruct without padding from variable-size tiles
def reconstruct_from_tiles(tiles, coords, original_size):
    reconstructed = np.zeros(original_size, dtype=np.float32)
    for tile, (i, j) in zip(tiles, coords):
        h, w = tile.shape
        reconstructed[i:i+h, j:j+w] = tile
    return reconstructed

# === Main Processing ===

# Assume fluorescence_cell is already loaded and shape is (4600, 4600)
tile_size = 1000
original_size = fluorescence_cell.shape

fluorescence_cell_tiles, coords = split_into_tiles(fluorescence_cell, tile_size)

mask_dir = os.path.join(data_dir, "masks")
os.makedirs(mask_dir, exist_ok=True)

masks = []
for i, tile in enumerate(fluorescence_cell_tiles):
    print(f"Processing tile {i} at position {coords[i]}...")

    h, w = tile.shape
    dapi_img_model = tile.reshape(1, h, w, 1)
    cytosol_img_model = np.zeros((1, h, w, 1))
    model_input = np.concatenate((dapi_img_model, cytosol_img_model), axis=-1)

    mask = app.predict(model_input, image_mpp=1.0, compartment='nuclear')[0, :h, :w, 0]
    masks.append(mask)

    plt.imsave(os.path.join(mask_dir, f"mask_tile_{i}.png"), mask, cmap='gray')
    np.save(os.path.join(mask_dir, f"mask_tile_{i}.npy"), mask)

# Reconstruct final full-size mask
combined_mask = reconstruct_from_tiles(masks, coords, original_size)

plt.imsave(os.path.join(mask_dir, "stitched.png"), combined_mask, cmap='gray')
plt.imsave(os.path.join(mask_dir, "binary_stitched.png"), (combined_mask > 0).astype(np.uint8), cmap='gray')

### Create Expression Matrix

In [None]:
import numpy as np
import os

data_dir = r"..\data\high_dose"
maldi_ihc_square = np.load(os.path.join(data_dir, "maldi_ihc.npy"))
lipids_square = np.load(os.path.join(data_dir, "lipids_stitched.npy"))
mzs = np.load(os.path.join(data_dir, "mzs.npy"))

In [None]:
results_dir = r"..\results\high_dose"

channels = range(23)
stitched_arrays = []

for channel in channels:
    stitched_path = os.path.join(results_dir, str(channel), "outputs", "stitched.png")
    
    if os.path.exists(stitched_path):
        img = cv2.imread(stitched_path, cv2.IMREAD_GRAYSCALE)
        stitched_arrays.append(img.astype(np.float32) / 255.0)
    else:
        print(f"Missing stitched image for channel {channel}")

maldi_ihc_gsr = np.stack(stitched_arrays, axis=0)

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt
import re

# ----------------------------
# CONFIGURATION
# ----------------------------
mask_dir = os.path.join(results_dir, "masks")
tile_size = 1000
stitched_size = (4600, 4600)  # Final output size with edge tiles
target_h, target_w = stitched_size

# ----------------------------
# LOAD MASK TILES (correct numeric order)
# ----------------------------
def extract_tile_index(filename):
    match = re.search(r"mask_tile_(\d+)\.npy", filename)
    return int(match.group(1)) if match else -1

tile_files = sorted(
    [f for f in os.listdir(mask_dir) if f.startswith("mask_tile_") and f.endswith(".npy")],
    key=extract_tile_index
)
mask_tiles = [np.load(os.path.join(mask_dir, f)) for f in tile_files]

# ----------------------------
# INFER COORDINATES IN RASTER ORDER
# ----------------------------
def infer_tile_coords(image_shape, tile_size):
    coords = []
    for i in range(0, image_shape[0], tile_size):
        for j in range(0, image_shape[1], tile_size):
            coords.append((i, j))  # top-left (y, x)
    return coords

coords = infer_tile_coords(stitched_size, tile_size)

# ----------------------------
# FUNCTION: STITCH MASKS WITH UNIQUE LABELS
# ----------------------------
def reconstruct_mask_with_unique_labels(mask_tiles, coords, stitched_size):
    combined_mask = np.zeros(stitched_size, dtype=np.int32)
    label_offset = 0

    for tile, (y, x) in zip(mask_tiles, coords):
        tile = tile.astype(np.int32)
        unique_labels = np.unique(tile)
        unique_labels = unique_labels[unique_labels > 0]

        new_tile = np.zeros_like(tile)
        for label_val in unique_labels:
            label_offset += 1
            new_tile[tile == label_val] = label_offset

        h, w = tile.shape
        combined_mask[y:y+h, x:x+w] = new_tile

    return combined_mask

# ----------------------------
# STITCH AND SAVE
# ----------------------------
stitched_mask = reconstruct_mask_with_unique_labels(mask_tiles, coords, stitched_size)
plt.imsave(os.path.join(mask_dir, "stitched_mask.png"), stitched_mask, cmap='gray')

In [None]:
import numpy as np
import cv2
import os
import pandas as pd
from skimage.measure import regionprops
from tqdm import tqdm

# ----------------------------
# CONFIGURATION
# ----------------------------
# MALDI-IHC channel names
maldi_ihc_labels = [
    "AKT", "AB42", "APP", "CTSD", "GFAP", "GLUT1/SLC2A1", "GSK3B", "Histone H2A.X", "IBA-1/AIF1",
    "LC3A/MAP1LC3A", "MBP", "NeuN/RBFOX3", "NEFL", "NCSTN", "pGSK3B_S9", "pTau_S404",
    "pTau_T205", "PVALB", "RAB7A", "TUBB3", "SYN1", "SNCA_B", "ADRA1A"
]

# Resize target
target_h, target_w = 4600, 4600

# ----------------------------
# EXTRACT REGIONPROPS
# ----------------------------
props = regionprops(stitched_mask)
print(f"Found {len(props)} labeled cell regions...")

# Extract centroids first
df_expression = pd.DataFrame({
    'y_centroid': [prop.centroid[0] for prop in props],
    'x_centroid': [prop.centroid[1] for prop in props]
})

# ----------------------------
# EXTRACT MALDI-IHC FEATURES
# ----------------------------
print("Extracting MALDI-IHC features...")
for c, marker in enumerate(tqdm(maldi_ihc_labels, desc="MALDI-IHC")):
    #img = maldi_ihc_square[c, :, :]
    img = maldi_ihc_gsr[c, :, :]
    channel_means = [img[prop.coords[:, 0], prop.coords[:, 1]].mean() for prop in props]
    df_expression[marker] = channel_means

# ----------------------------
# EXTRACT LIPID FEATURES
# ----------------------------
print("Extracting Lipid features...")
for c, mz in enumerate(tqdm(mzs, desc="Lipids")):
    img = lipids_square[c, :, :]
    channel_means = [img[prop.coords[:, 0], prop.coords[:, 1]].mean() for prop in props]
    df_expression[f"mz_{mz:.4f}"] = channel_means

# ----------------------------
# SAVE EXPRESSION MATRIX
# ----------------------------
df_filename = os.path.join(results_dir, "maldi_ihc_lipid_df.csv")
df_expression.to_csv(df_filename, index=False)
print(f"Saved expression matrix to: {df_filename}")

In [None]:
import scanpy as sc
import anndata

# Extract feature columns (all intensity values)
feature_cols = [col for col in df_expression.columns if not col.endswith('_centroid')]

# Create AnnData
adata = anndata.AnnData(
    X=df_expression[feature_cols].values.astype(np.float32),  # expression matrix
    obs=df_expression[['x_centroid', 'y_centroid']].copy()     # metadata
)

# Assign feature (channel) names
adata.var_names = feature_cols

# Assign cell names
adata.obs_names = [f"cell_{i}" for i in range(adata.n_obs)]

# Add spatial coordinates
adata.obsm['spatial'] = df_expression[['x_centroid', 'y_centroid']].values

adata.obsm["X_maldi_ihc"] = adata[:, maldi_ihc_labels].X
adata.write(os.path.join(results_dir, "maldi_ihc_lipid_adata.h5ad"))

### Cell Phenotyping

In [None]:
import os
import pandas as pd
import scanpy as sc
import numpy as np
from anndata import AnnData

# === Load data ===
# High Dose
base_dir = r"..\results\high_dose"
adata_high = sc.read_h5ad(os.path.join(base_dir, "maldi_ihc_lipid_adata.h5ad"))

# Low Dose
base_dir = r"..\results\low_dose"
adata_low = sc.read_h5ad(os.path.join(base_dir, "maldi_ihc_lipid_adata.h5ad"))

# No Dose
base_dir = r"..\results\no_dose"
adata_no = sc.read_h5ad(os.path.join(base_dir, "maldi_ihc_lipid_adata.h5ad"))

# === Find intersecting features ===
common_lipid  = sorted(set(adata_high.var_names)  & set(adata_low.var_names) & set(adata_no.var_names))

# === Subset AnnData objects to common features ===
adata_high  = adata_high[:, common_lipid]
adata_low  = adata_low[:, common_lipid]
adata_no  = adata_no[:, common_lipid]

# Add condition labels
adata_high.obs["condition"] = "High"
adata_low.obs["condition"] = "Low"
adata_no.obs["condition"] = "No"

# Concatenate rows (cells)
adata_combined = adata_high.concatenate(
    [adata_low, adata_no],
    batch_key=None,
    index_unique=None
)

# === Done ===
print(adata_combined)
print("Features:", adata_combined.var_names[:10])
print("obs:", adata_combined.obs.columns)

output_path = r"..\results\high_low_no
os.makedirs(output_path, exist_ok=True)
adata_combined.write(os.path.join(output_path, 'combined_adata.h5ad'))

In [None]:
import scanorama
import scanpy as sc
import anndata as ad

adata_high_scan = adata_combined[adata_combined.obs["condition"] == "High"].copy()
adata_low_scan = adata_combined[adata_combined.obs["condition"] == "Low"].copy()
adata_no_scan = adata_combined[adata_combined.obs["condition"] == "No"].copy()

scanorama_adata_list = scanorama.correct_scanpy(
    [adata_high_scan, adata_low_scan, adata_no_scan],
    return_dimred=True
)

scanorama_combined_adata = ad.concat(scanorama_adata_list, axis=0)

output_path = r"..\results\high_low_no
adata_combined.write(os.path.join(output_path, 'scanorama_combined_adata.h5ad'))

In [None]:
import scanpy as sc

input_path = r"..\results\high_low_no\scanorama_combined_adata.h5ad"
scanorama_combined_adata = sc.read_h5ad(input_path)

input_path = r"..\results\high_low_no\combined_adata.h5ad"
adata_combined = sc.read_h5ad(input_path)

In [None]:
shared_features = ["GFAP", "GLUT1/SLC2A1", "IBA-1/AIF1", "NeuN/RBFOX3", "MBP", "PVALB", "TUBB3", "SYN1"]

# Step 1: Check that features exist
shared_features = [f for f in shared_features if f in scanorama_combined_adata.var_names]

# Step 2: Set raw if not already done (to preserve full matrix)
scanorama_combined_adata.raw = scanorama_combined_adata

# Step 3: Create a new representation using only shared features
scanorama_combined_adata.obsm['X_shared'] = scanorama_combined_adata[:, shared_features].X

# Step 4: Run neighbors, UMAP, Leiden using the shared features
sc.pp.neighbors(scanorama_combined_adata, n_neighbors=15, use_rep='X_shared')
sc.tl.umap(scanorama_combined_adata)
sc.tl.leiden(scanorama_combined_adata, resolution=0.5)

output_path = r"..\results\high_low_no\scanorama_combined_adata_leiden.h5ad"
scanorama_combined_adata.write(output_path)

In [None]:
import scanpy as sc

shared_features = ["GFAP", "GLUT1/SLC2A1", "IBA-1/AIF1", "NeuN/RBFOX3", "MBP", "PVALB", "TUBB3", "SYN1"]

input_path = r"..\results\high_low_no\scanorama_combined_adata_leiden.h5ad"
scanorama_combined_adata = sc.read_h5ad(input_path)

input_path = r"..\results\high_low_no\combined_adata.h5ad"
adata_combined = sc.read_h5ad(input_path)
adata_combined.obs['leiden'] = scanorama_combined_adata.obs['leiden'].copy()

output_path = r"..\results\high_low_no\combined_adata_leiden.h5ad"
adata_combined.write(output_path)

In [None]:
output_dir = r"..\results\high_low_no"

# Set Scanpy figure output directory
sc.settings.figdir = output_dir

# Save UMAP with cluster coloring
sc.pl.umap(scanorama_combined_adata, color=["leiden"], save="_scanorama_leiden.png")
sc.pl.umap(scanorama_combined_adata, color=["condition"], save="_scanorama_condition.png")

In [None]:
# === Matrix plot grouped by Leiden clusters ===
sc.pl.matrixplot(
    adata_combined,
    var_names=shared_features,
    groupby='leiden',
    cmap='viridis',
    standard_scale='var',  # Z-score across each marker
    swap_axes=True,
    save="scanorama_leiden_clusters.png"
)

In [None]:
import os
import matplotlib.pyplot as plt

# === Extract color map from scanorama_combined_adata ===
leiden_colors = {
    cat: color for cat, color in zip(
        scanorama_combined_adata.obs['leiden'].cat.categories,
        scanorama_combined_adata.uns['leiden_colors']
    )
}

# === Output folder ===
output_dir = r"results\high_low_no"
os.makedirs(output_dir, exist_ok=True)

# === Plot for each condition separately ===
for condition in ['High', 'Low', 'No']:
    subset = scanorama_combined_adata[scanorama_combined_adata.obs['condition'] == condition].copy()
    
    colors = subset.obs['leiden'].map(leiden_colors)
    
    plt.figure(figsize=(8, 8))
    plt.scatter(
        subset.obs['x_centroid'],
        subset.obs['y_centroid'],
        c=colors,
        s=1,
        linewidth=0,
        alpha=1.0
    )
    plt.gca().set_aspect('equal')
    plt.gca().invert_yaxis()
    plt.axis('off')

    save_path = os.path.join(output_dir, f"{condition.lower()}_scanorama_leiden_clusters_spatial_map.png")
    plt.savefig(save_path, dpi=600, bbox_inches='tight', pad_inches=0.0)
    plt.show()

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, dendrogram
from sklearn.cluster import AgglomerativeClustering
import matplotlib.pyplot as plt
import scanpy as sc

output_dir = r"..\results\high_low_no"
os.makedirs(output_dir, exist_ok=True)

# === Step 1: Extract MALDI-IHC expression + Leiden labels ===
X = scanorama_combined_adata[:, shared_features].X
X = X.toarray() if hasattr(X, 'toarray') else X
leiden_labels = scanorama_combined_adata.obs['leiden'].astype(str)

# === Step 2: Create dataframe with expression and cluster labels ===
df_expr = pd.DataFrame(X, columns=shared_features, index=scanorama_combined_adata.obs_names)
df_expr['leiden'] = leiden_labels.values

# === Step 3: Compute average expression per Leiden cluster ===
cluster_means = df_expr.groupby('leiden').mean()

# === Step 4: Clean data — handle NaNs, Infs, and all-zero clusters ===
cluster_means = cluster_means.fillna(0).replace([np.inf, -np.inf], 0)

# Drop clusters with all-zero expression across MALDI-IHC markers
nonzero_mask = (cluster_means != 0).any(axis=1)
cluster_means_filtered = cluster_means[nonzero_mask]

# Final check
assert np.all(np.isfinite(cluster_means_filtered.values)), "Error: non-finite values remain in cluster means"

# === Step 5: Compute cosine distance + hierarchical linkage ===
distances = pdist(cluster_means_filtered, metric='cosine')
linkage_matrix = linkage(distances, method='average')

# === Step 6: Plot and save dendrogram ===
plt.figure(figsize=(6, 4))
dendrogram(
    linkage_matrix,
    labels=cluster_means_filtered.index.tolist(),
    leaf_rotation=90,
    leaf_font_size=10,
    color_threshold=0,
    above_threshold_color='black'
)
plt.title("Hierarchical Clustering of Leiden Clusters (MALDI-IHC)")
plt.ylabel("Cosine Distance")
plt.tight_layout()

save_path = os.path.join(output_dir, "scanorama_hierarchical_leiden_clusters.png")
plt.savefig(save_path, dpi=600, bbox_inches='tight', pad_inches=0.0)
plt.show()

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# === Setup ===
adata = scanorama_combined_adata.copy()
output_dir = r"..\results\high_low_no"
os.makedirs(output_dir, exist_ok=True)

# Ensure 'leiden_merged' is categorical
adata.obs['leiden_merged'] = adata.obs['leiden_merged'].astype('category')

# Extract color mapping
leiden_merged_colors = {
    str(cat): color for cat, color in zip(
        adata.obs['leiden_merged'].cat.categories,
        adata.uns['leiden_merged_colors']
    )
}

# === Plot for each condition separately ===
for condition in ['High', 'Low', 'No']:
    # Subset
    adata_temp = adata[adata.obs['condition'] == condition].copy()

    # Create plotting DataFrame
    plot_df = adata_temp.obs[['x_centroid', 'y_centroid', 'leiden_merged']].copy()
    plot_df = plot_df.dropna(subset=['leiden_merged'])
    plot_df['leiden_merged'] = plot_df['leiden_merged'].astype(str)
    plot_df['color'] = plot_df['leiden_merged'].map(leiden_merged_colors)
    plot_df = plot_df.dropna(subset=['color'])

    # Plot
    fig, ax = plt.subplots(figsize=(8, 8))
    ax.scatter(
        plot_df['x_centroid'],
        plot_df['y_centroid'],
        c=plot_df['color'].values,
        s=1,
        linewidth=0,
        alpha=1.0
    )
    ax.set_aspect('equal')
    ax.axis('off')
    ax.invert_yaxis()

    plt.tight_layout()

    # Save
    save_path = os.path.join(output_dir, f"{condition.lower()}_scanorama_merged_leiden_clusters_spatial_map.png")
    plt.savefig(save_path, dpi=600, bbox_inches='tight', pad_inches=0.0)
    plt.show()

In [None]:
scanorama_combined_adata.write(os.path.join(output_dir, "scanorama_combined_adata_leiden_merged.h5ad"))
adata_combined.write(os.path.join(output_dir, "combined_adata_leiden_merged.h5ad"))