In [30]:
import os
import scanpy as sc
import pandas as pd
import json
from PIL import Image
import numpy as np

def process_sample(sample_path):
    try:
        sample_name = os.path.basename(sample_path)
        matrix_dir = os.path.join(sample_path, "outs", "filtered_feature_bc_matrix")
        spatial_dir = os.path.join(sample_path, "outs", "spatial")

        # Read expression matrix
        adata = sc.read_10x_mtx(matrix_dir, var_names="gene_symbols", make_unique=True)

        # Find tissue position file
        for filename in ["tissue_positions.csv", "tissue_positions_list.csv"]:
            positions_path = os.path.join(spatial_dir, filename)
            if os.path.exists(positions_path):
                break
        else:
            raise FileNotFoundError("No tissue position file found.")

        # Load tissue positions
        positions = pd.read_csv(positions_path, header=None)
        positions.columns = [
            "barcode", "in_tissue", "array_row", "array_col", "pxl_row_in_fullres", "pxl_col_in_fullres"
        ]
        positions = positions.dropna(subset=["barcode"])  # drop incomplete rows
        positions.set_index("barcode", inplace=True)
        adata.obs = adata.obs.join(positions, how="left")

        # Add spatial coordinates to .obsm
        adata.obsm["spatial"] = adata.obs[["pxl_row_in_fullres", "pxl_col_in_fullres"]].to_numpy()

        # Add scale factors
        with open(os.path.join(spatial_dir, "scalefactors_json.json")) as f:
            scalefactors = json.load(f)

        # Add image if available
        image_path = os.path.join(spatial_dir, "tissue_hires_image.png")
        if os.path.exists(image_path):
            img = Image.open(image_path)
            img = np.array(img)

            adata.uns["spatial"] = {
                sample_name: {
                    "images": {"hires": img},
                    "scalefactors": scalefactors,
                    "metadata": {"chemistry_description": "Unknown"}
                }
            }
        else:
            print(f"⚠️ No image found for {sample_name}")
            adata.uns["spatial"] = {
                sample_name: {
                    "images": {},
                    "scalefactors": scalefactors,
                    "metadata": {"chemistry_description": "Unknown"}
                }
            }

        # Save to h5ad
        out_file = os.path.join(output_dir, f"{sample_name}.h5ad")
        adata.write(out_file)
        print(f"✅ Saved {out_file}")

    except Exception as e:
        print(f"❌ Failed to process {sample_path}: {e}")

In [31]:
# Loop through all folders
for base_dir in input_dirs:
    for sample_name in os.listdir(base_dir):
        sample_path = os.path.join(base_dir, sample_name)
        if os.path.isdir(sample_path) and os.path.exists(os.path.join(sample_path, "outs","filtered_feature_bc_matrix")):
            process_sample(sample_path)

✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P24215_301.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P24215_201.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P24215_101.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P28052_201.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P28052_202.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P28052_203.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P28052_401.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P28052_102.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P28052_301.h5ad
✅ Saved /Users/christoffer/work/karolinska/development/ST_BRICHOS/results/P28052_103.h5ad
