In [4]:
import os
import glob
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Define markers for different CAF subsets with alternate gene names
myCAF_markers = ["ACTA2", "FAP", "PDGFRB"]
iCAF_markers = ["IL6", "CXCL12", "PDPN"]
apCAF_markers = ["HLA-DRA", "HLA-DRB1", "HLA-DQA1", "HLA-DQB1", "CD74"]  # Alternate names for HLA-DR and HLA-DQ
senCAF_markers = ["CDKN2A", "p21", "IL6", "MMP9"]  # CDKN2A as an alternate for p16INK4A

# Dictionary to store each CAF subset, its markers, and its color map
CAF_subsets = {
    'myCAFs': {"markers": myCAF_markers, "cmap": "coolwarm"},
    'iCAFs': {"markers": iCAF_markers, "cmap": "Spectral"},
    'apCAFs': {"markers": apCAF_markers, "cmap": "viridis"},
    'senCAFs': {"markers": senCAF_markers, "cmap": "plasma"}
}

# Define directory for .h5ad files
directory = '/Users/adiallo/Desktop/Thesis/Data_Documents/All_Data/Visium_samples/Visium_Data/samples'
file_paths = glob.glob(os.path.join(directory, '*.h5ad'))

# Loop over each file path
for file_path in file_paths:
    # Read the adata file
    adata = sc.read_h5ad(file_path)
    
    # Loop over each CAF subset
    for subset_name, subset_info in CAF_subsets.items():
        markers = subset_info["markers"]
        cmap = subset_info["cmap"]
        
        # Initialize a DataFrame to store the sum of expression values for each CAF subset per cell
        cells_marker_expression = pd.DataFrame(0, index=adata.obs.index, columns=[f'{subset_name} Score'])

        # Calculate the CAF score as the sum of expression values for each marker in the subset
        for gene in markers:
            if gene in adata.var_names:  # Check if the gene is present
                cells_marker_expression[f'{subset_name} Score'] += adata[:, gene].X.toarray().ravel()
            else:
                print(f"Gene {gene} not found in dataset. Skipping...")

        # Plotting
        plt.figure(figsize=(10, 8))
        
        # Map the CAF score to a diverging color gradient
        scatter = plt.scatter(
            adata.obsm['spatial'][:, 0], 
            adata.obsm['spatial'][:, 1], 
            c=cells_marker_expression[f'{subset_name} Score'],
            cmap=cmap,  # Diverging colormap for each subset
            alpha=0.6
        )
        
        plt.colorbar(scatter, label=f'{subset_name} Score')  # Add a color bar for reference
        plt.xlabel('Spatial coordinate X')
        plt.ylabel('Spatial coordinate Y')
        plt.title(f'Spatial distribution of {subset_name} markers for {os.path.basename(file_path)}')

        # Save the plot
        plt.savefig(f"{os.path.splitext(file_path)[0]}_{subset_name}_markers_intensity_plot.png", bbox_inches='tight')
        plt.close()

Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not

  utils.warn_names_duplicates("var")


Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not found in dataset. Skipping...
Gene HLA-DQA1 not found in dataset. Skipping...
Gene HLA-DQB1 not found in dataset. Skipping...
Gene p21 not found in dataset. Skipping...
Gene HLA-DRA not found in dataset. Skipping...
Gene HLA-DRB1 not