We are excited to share that Panoramics-a-Vision proudly took part in the recent hackathon! Panoramics is a pan-Canadian catalyst for spatial biology and single-cell innovation, bringing together science, technology, and visionary impact.

Our hackathon team combined diverse expertise and strong collaboration:


*   Shamini Ayyadhury – CEO of Panoramics and our team leader, providing vision and direction.
*   Suluxan Mohanraj – Software Developer and co-team leader, driving our technical development.
*   Arzu Kirici – Computational Scientist, contributing analytical depth and problem-solving.
*   Alyona Ivanova – Scientist, Creative Content Maestro
*   Kasra Rahimian – Web Developer, ensuring seamless design and implementation on the web side.

Together, we worked across disciplines to turn ideas into solutions, demonstrating the collaborative spirit that defines Panoramics.

Our project is SpatioScript, an AI-powered tool that lets researchers query spatial biology data using natural language. By combining single-cell RNA sequencing with spatial transcriptomics, SpatioScript helps uncover how cells are organized and interact in the tumour microenvironment. Users can simply ask questions like “Are CD8+ T cells enriched in the tumour core?” and get biologically meaningful answers. The platform lowers the barrier to exploring complex datasets and bridges image-based pathology with high-dimensional molecular data making spatial biology more accessible to scientists and clinicians.

In [None]:
import pandas as pd      # Data manipulation and analysis (tables, spreadsheets)
import numpy as np       # Numerical computations and working with arrays
import matplotlib.pyplot as plt   # Data visualization with plots and figures
import seaborn as sns    # Statistical data visualization with cleaner styles
import scanpy as sc      # Single-cell and spatial omics analysis toolkit
from PIL import Image    # Image processing and opening/saving image files
#from scipy.sparse import csr_array   # (Optional) Efficient handling of sparse matrices
import os                # Interacting with the operating system (files, paths)
import math              # Mathematical functions and calculations
import warnings          # Manage and filter warning messages
warnings.filterwarnings('ignore')  # Suppress unnecessary warnings for cleaner output

In [None]:
data_dir = '/home/mystique27m/ext_gpu_hd/hackathon/data/'  # Directory where the input data files are stored

out = '/home/mystique27m/ext_gpu_hd/hackathon/out/'  # Main output directory for saving all results

os.makedirs(out, exist_ok=True)  # Create the output directory if it does not already exist

save_dir = out + 'script01a_files/'  # Subdirectory to save processed data files

os.makedirs(save_dir, exist_ok=True)  # Create the 'script01a_files' folder if it does not already exist

os.makedirs(out+'script01a_figures/', exist_ok=True)  # Create the 'script01a_figures' folder to store plots/figures

figures_path = out+'script01a_figures/'  # Path variable pointing to where figures will be saved


In [None]:
colors_assignment = ['#EEEDEB','#2F3645']   # Color palette for assignment categories (e.g., background vs highlighted groups)

colors_probe_membership = ['#5C848E','#1D2323']   # Color palette for probe membership categories (e.g., members vs non-members)

In [None]:
folder_paths = []                           # List to store folder paths (currently empty)

features_filepath = 'cell_feature_matrix.h5'    # File containing cell feature matrix in HDF5 format
cells_filename = 'cells.parquet'                # File containing cell-level data in Parquet format
transcripts_filename = 'transcripts.parquet'    # File containing transcript-level data in Parquet format

In [None]:
os.listdir(data_dir)   # List all files and folders inside the directory specified by 'data_dir'

In [None]:
df_transcript = pd.read_parquet(os.path.join(data_dir, transcripts_filename))   # Load transcript-level data from the Parquet file into a pandas DataFrame

df_transcript['sample'] = '10x_Xenium_V1_Human_Brain_GBM_FFPE_outs'   # Add a new column named 'sample' to label all rows with the dataset/sample name

In [None]:
import gc        # Import Python's garbage collection module (manages memory cleanup)

gc.collect()     # Manually trigger garbage collection to free up unused memory

Data wrangling / Preprocessing

In [None]:
from concurrent.futures import ProcessPoolExecutor
# For parallel processing (not yet used in this snippet, but useful for large data)

### code to process one large transcript file

### filter out negative control probes
df_neg = df_transcript[df_transcript.feature_name.str.startswith(('BLANK','Neg','antisense'))].copy()
# Select rows where the feature name starts with BLANK / Neg / antisense = negative control probes
df_neg['group'] = 'neg_probes'
# Label these rows as negative probes

### filter out transcripts that are genes
df_genes = df_transcript[~df_transcript['transcript_id'].isin(df_neg.transcript_id)].copy()
# Select the remaining transcripts (not in negative probes) = true gene probes
df_genes['group'] = 'gene_probes'
# Label these rows as gene probes

df = pd.concat([df_neg, df_genes], axis=0)
# Combine negative probes and gene probes into a single DataFrame

### ensure that index for df is equal to original tf
df.set_index(df_transcript.index, inplace=True)
# Reset index so that it matches the original transcript DataFrame

### assign binary labels to assigned and unassigned cells
df.loc[df.cell_id == 'UNASSIGNED', 'binary'] = 'unassigned'
# Label rows with cell_id = UNASSIGNED
df.loc[df.cell_id != 'UNASSIGNED', 'binary'] = 'assigned'
# Label rows with valid cell_id as assigned

Control plots and evaluation

In [None]:
df #final combined DataFrame

In [None]:
import matplotlib.pyplot as plt    # Plotting library for creating figures
import seaborn as sns              # Statistical data visualization with nicer styles
import numpy as np                 # Numerical computing
import pandas as pd                # Data manipulation and analysis

fig, axes = plt.subplots(1, 3, figsize=(30, 9))
# Create a figure with 1 row and 3 columns of subplots, large size for readability

sample_name = df['sample'].values[0]
# Extract the sample name from the DataFrame (all rows share the same sample)

### binary - percentage of assigned and unassigned - proportions by sample
df_binary = df['binary'].value_counts(normalize=True).to_frame().reset_index()
# Count proportions of 'assigned' vs 'unassigned' (normalize=True gives percentages)
df_binary['proportion'] = df_binary['proportion'] * 100
# Convert proportions to percentages
df_binary['sample'] = sample_name
# Add the sample name as a column for plotting

df['group'] = df['group'].astype('category').cat.reorder_categories(['neg_probes', 'gene_probes'], ordered=True)
# Ensure 'group' column is categorical and ordered: neg_probes first, gene_probes second
df_group = df['group'].value_counts(normalize=True).to_frame().reset_index()
# Count proportions of neg_probes vs gene_probes
df_group['proportion'] = df_group['proportion'] * 100
# Convert proportions to percentages
df_group['sample'] = sample_name
# Add the sample name

df3 = df[['group', 'binary']].value_counts(normalize=True).to_frame().reset_index()
# Count joint proportions of group × binary categories
df3['proportion'] = df3['proportion'] * 100
# Convert proportions to percentages
df3['sample'] = sample_name
# Add sample name

# --- Plot 1: Assigned vs unassigned ---
sns.stripplot(data=df_binary,
              x='binary',
              y='proportion',
              hue='sample',
              marker='D',
              alpha=0.75,
              size=12,
              ax=axes[0])
# Strip plot showing proportions of binary assignment status

# --- Plot 2: Negative probes vs gene probes ---
sns.stripplot(data=df_group,
              x='group',
              y='proportion',
              hue='sample',
              marker='D',
              alpha=0.75,
              size=12,
              ax=axes[1])
# Strip plot showing proportions of probe groups

# --- Plot 3: Combined group × binary ---
sns.stripplot(data=df3,
              x='group',
              y='proportion',
              hue='binary',
              marker='D',
              alpha=0.75,
              size=12,
              ax=axes[2],
              linewidth=0.3)
# Strip plot showing combined proportions of probe groups split by binary labels

# Move the legend for each axis to the right-hand side
# Tilt x-axis tick labels by 45 degrees and adjust legend placement
#for ax in axes:
#    ax.tick_params(axis='x', rotation=45)
#    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), frameon=False, markerscale=0.9)

#sns.despine()
# Remove the top and right spines for a cleaner plot

#plt.tight_layout()
# Adjust layout so plots don’t overlap

#plt.savefig(figures_path+'fig01_binary_proportion.png', dpi=300, bbox_inches='tight')
# Save the final figure as a high-resolution PNG

In [None]:
os.makedirs(out+'script01a_cleaned_tf_annot_gene_probes/', exist_ok=True)
# Create folder to save cleaned transcript files for gene probes

os.makedirs(out+'script01a_cleaned_tf_annot_control_probes/', exist_ok=True)
# Create folder to save cleaned transcript files for control (negative) probes

tf_gene = df[df['group']=='gene_probes']
# Subset DataFrame to keep only gene probes

tf_control = df[df['group']=='neg_probes']
# Subset DataFrame to keep only negative control probes

tf_gene.to_csv(out+'script01a_cleaned_tf_annot_gene_probes/'+df['sample'].unique()[0]+'_gene_probes_tf_cleaned.csv')
# Save cleaned gene probe transcripts as CSV, named with the sample ID

tf_control.to_csv(out+'script01a_cleaned_tf_annot_control_probes/'+df['sample'].unique()[0]+'_control_probes_tf_cleaned.csv')
# Save cleaned control probe transcripts as CSV, named with the sample ID