In [None]:
import scimap as sm
import pandas as pd
import sys
import os
import scanpy as sc
import seaborn as sns; sns.set(color_codes=True)
import anndata
adata = anndata.read_h5ad(path_to_adata)
adata.obs

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances

# Set a maximum distance threshold 
max_distance = 3000  

# Initialize an empty list to store results
results = []

# Get unique imageIDs
image_ids = adata.obs['imageid'].unique()

for sample_name in image_ids:
    # Filter the data for the selected sample
    adata_sample = adata[adata.obs['imageid'] == sample_name]

    # Extract coordinates for the Crevicular niche
    crevicular_coords = adata_sample.obs.loc[
        adata_sample.obs['niche_renamed'] == 'TAE', 
        ['X_centroid', 'Y_centroid']
    ].to_numpy()

    # Check if there are enough Crevicular cells
    if crevicular_coords.shape[0] >= 200:
        # Extract coordinates for other niches
        niches = {
            'T-B-APC': adata_sample.obs.loc[adata_sample.obs['niche_renamed'] == 'T-B-APC', ['X_centroid', 'Y_centroid']].to_numpy(),
            'Plasma': adata_sample.obs.loc[adata_sample.obs['niche_renamed'] == 'Plasma', ['X_centroid', 'Y_centroid']].to_numpy(),
            'NeutCT': adata_sample.obs.loc[adata_sample.obs['niche_renamed'] == 'NeutCT', ['X_centroid', 'Y_centroid']].to_numpy()
        }

        # Track the unique cells and distances for each niche
        for target_niche, target_coords in niches.items():
            if target_coords.size > 0:
                distances = pairwise_distances(crevicular_coords, target_coords)
                
                # Find the indices of the target cells that are within the maximum threshold for any Crevicular cell
                valid_cells_indices = np.unique(np.where(distances <= max_distance)[1])
                
                # Filter the valid target coordinates
                valid_target_cells = target_coords[valid_cells_indices]
                
                # Count the number of unique target cells within the threshold
                num_unique_cells = len(valid_cells_indices)
                
                # Calculate the average distance for the valid target cells
                if num_unique_cells > 0:
                    valid_distances = distances[:, valid_cells_indices]
                    avg_distance = valid_distances[valid_distances <= max_distance].mean()
                else:
                    avg_distance = None
                    
                # Append the results to the list
                results.append({
                    'Sample': sample_name,
                    'Target Niche': target_niche,
                    'Average Distance': avg_distance,
                    'Unique Cells Count': num_unique_cells
                })
            else:
                results.append({
                    'Sample': sample_name,
                    'Target Niche': target_niche,
                    'Average Distance': 'N/A',
                    'Unique Cells Count': 0
                })
    else:
        results.append({
            'Sample': sample_name,
            'Target Niche': 'N/A',
            'Average Distance': 'N/A',
            'Unique Cells Count': 'Not enough TAE cells'
        })

# Convert results into a Pandas DataFrame
df_results = pd.DataFrame(results)

df_results['Sample Type'] = df_results['Sample'].apply(lambda x: 'Periodontitis' if x.startswith('P') else 'Health')


# Optionally, save the raw results to a CSV file
#df_results.to_csv('combined_distances_and_cells.csv', index=False)


# Keep only rows where values are numeric
df_results_filtered = df_results[
    pd.to_numeric(df_results['Average Distance'], errors='coerce').notna() &
    pd.to_numeric(df_results['Unique Cells Count'], errors='coerce').notna()
].copy()

# Convert columns to float
df_results_filtered['Average Distance'] = df_results_filtered['Average Distance'].astype(float)
df_results_filtered['Unique Cells Count'] = df_results_filtered['Unique Cells Count'].astype(float)

# Save the filtered results to a CSV file
#df_results_filtered.to_csv('combined_distances_and_cell_filtereds.csv', index=False)

# Display the results
print(df_results_filtered)
