# Run Baysor on per-section CSVs

This notebook runs Baysor on each per-section CSV exported from `00-data-wrangling.ipynb`.
Update the configuration cell for your local Baysor environment and desired parameters.


In [1]:
using ProgressMeter
ProgressMeter.ijulia_behavior(:append)


IJuliaAppend::IJuliaBehavior = 2

In [2]:
import Pkg

# Update this to your Baysor environment path
baysor_env = "/Users/christoffer/Baysor"
Pkg.activate(baysor_env)

using Baysor
using DataFrames
using CSV

println("Threads: ", Threads.nthreads())


[32m[1m  Activating[22m[39m project at `~/Baysor`


Threads: 16


## Configuration


In [None]:
# Input/output folders
input_dir = "/Volumes/processing2/nature-dev-mouse-reanalysis/data"
output_root = "/Volumes/processing2/nature-dev-mouse-reanalysis/data/baysor_output"

# Baysor parameters (adjust as needed)
baysor_params = (
    x_column = :x,
    y_column = :y,
    # For 2D data, set z_column = nothing
    gene_column = :gene,
    min_molecules_per_cell = 20,
    n_clusters = 3,
    scale = 31.0,
    iters = 500
)


(x_column = :x, y_column = :y, z_column = nothing, gene_column = :gene, min_molecules_per_cell = 20, n_clusters = 3, scale = 31.0, iters = 500)

## Discover input files


In [23]:
# Find all CSVs under input_dir (skip macOS AppleDouble files)
csv_files = String[]
for (root, _, files) in walkdir(input_dir)
    for f in files
        lf = lowercase(f)
        if startswith(f, "._")
            continue
        end
        if endswith(lf, ".csv")
            push!(csv_files, joinpath(root, f))
        end
    end
end

println("Found ", length(csv_files), " CSV files")
csv_files


Found 27 CSV files


27-element Vector{String}:
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02A/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02B/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02C/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02D/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02E/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02F/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_06A/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_06B/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_06C/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_06D/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_06E/spots.csv"
 "/Volumes/processing2/nature-dev-mouse-reanalysis/data/section_06F/spots.csv"
 "/Volumes/processing2/na

## Run Baysor


In [None]:
"""
Check if a segmentation CSV already exists in the output folder.
"""
function segmentation_exists(output_dir::String)::Bool
    if !isdir(output_dir)
        return false
    end
    for f in readdir(output_dir)
        lf = lowercase(f)
        if occursin("segmentation", lf) && endswith(lf, ".csv")
            return true
        end
    end
    return false
end

"""
Run Baysor on a single CSV file.
"""
function run_baysor(input_path::String; params...)
    base = splitext(basename(input_path))[1]
    m = Int(params[:min_molecules_per_cell])
    s = params[:scale]
    s_str = Int(s) == s ? string(Int(s)) : string(s)
    output_dir = joinpath(dirname(input_path), "m$(m)_s$(s_str)")
    mkpath(output_dir)
    output_prefix = joinpath(output_dir, "segmentation")

    if segmentation_exists(output_dir)
        println("Skipping (already exists): ", input_path)
        return (status="skipped", output=output_dir)
    end

    iters = haskey(params, :iters) ? Int(params[:iters]) : 500
    cfg = Baysor.Utils.RunOptions()
    cfg.segmentation.iters = iters

    # Build kwargs without z_column unless provided
    kwargs = Dict(
        :x_column => params[:x_column],
        :y_column => params[:y_column],
        :gene_column => params[:gene_column],
        :min_molecules_per_cell => params[:min_molecules_per_cell],
        :n_clusters => params[:n_clusters],
        :scale => params[:scale],
        :output => output_prefix,
        :config => cfg,
    )

    if haskey(params, :z_column)
        kwargs[:z_column] = params[:z_column]
    end

    println("Running Baysor on: ", input_path)
    Baysor.CommandLine.run(input_path; kwargs...)

    return (status="success", output=output_dir)
end

results = []

for (i, file) in enumerate(csv_files)
    println("[", i, "/", length(csv_files), "]")
    try
        res = run_baysor(file; baysor_params...)
        push!(results, (file=file, status=res.status, output=res.output))
    catch e
        println("ERROR: ", e)
        push!(results, (file=file, status="error", error=string(e)))
    end
end

results


[1/27]
Running Baysor on: /Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02A/spots.csv
[13:29:17] Info: Run R7172c4560
[13:29:17] Info: (2026-02-02) Run Baysor v0.7.1
[13:29:17] Info: Using local Baysor build
[13:29:17] Info: Loading data...
[13:29:17] Info: Loaded 58705 transcripts, 119 genes.
[13:29:18] Info: Estimating noise level
[13:29:18] Info: Done
[13:29:18] Info: Clustering molecules...


[32mProgress:   0%|▏                                        |  ETA: 0:02:15[39m
[34m                   Iteration: 23[39m
[34m             Max. difference: 0.241[39m
[A4m   Fraction of probs changed: 0.939[39m


[32mProgress:   0%|▏                                        |  ETA: 0:02:48[39m
[34m                   Iteration: 44[39m
[34m             Max. difference: 0.17[39m
[A4m   Fraction of probs changed: 0.903[39m


[32mProgress:   1%|▎                                        |  ETA: 0:02:57[39m
[34m                   Iteration: 59[39m
[34m             Max. difference: 0.0725[39m
[A4m   Fraction of probs changed: 0.858[39m


[32mProgress:   1%|▎                                        |  ETA: 0:03:03[39m
[34m                   Iteration: 74[39m
[34m             Max. difference: 0.0349[39m
[A4m   Fraction of probs changed: 0.814[39m


[32mProgress:   1%|▍                                        |  ETA: 0:03:07[39m
[34m                   Iteration: 89[39

[13:29:22] Info: Algorithm stopped after 198 iterations. Max. probability difference: 0.000405. Converged: true.
[13:29:22] Info: Done
[13:29:22] Info: Initializing algorithm. Scale: 31.0, scale std: 7.75, initial #components: 5870, #molecules: 58705.
[13:29:22] Info: Using the following additional information about molecules: [:confidence, :cluster]
[13:29:22] Info: Using 2D coordinates


[32mProgress:   3%|█▏                                       |  ETA: 0:00:04[39m
[34m         Iteration: 13[39m
[34m    Noise level, %: 6.6[39m
[A4m   Num. components: 873[39m


[32mProgress:   5%|██                                       |  ETA: 0:00:04[39m
[34m         Iteration: 25[39m
[34m    Noise level, %: 6.5[39m
[A4m   Num. components: 979[39m


[32mProgress:   7%|███                                      |  ETA: 0:00:04[39m
[34m         Iteration: 36[39m
[34m    Noise level, %: 11.31[39m
[A4m   Num. components: 960[39m


[32mProgress:  10%|████                                     |  ETA: 0:00:04[39m
[34m         Iteration: 49[39m
[34m    Noise level, %: 6.43[39m
[A4m   Num. components: 1110[39m


[32mProgress:  13%|█████▍                                   |  ETA: 0:00:03[39m
[34m         Iteration: 66[39m
[34m    Noise level, %: 10.21[39m
[A4m   Num. components: 1062[39m


[32mProgress:  16%|██████▊                                  |  ETA:

[13:29:25] Info: Processing complete.
[13:29:25] Info: Estimating boundary polygons
[13:29:25] Info: Saving results to /Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02A/m20_s31/segmentation
[13:29:25] Info: All done!
[2/27]
Running Baysor on: /Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02B/spots.csv
[13:29:25] Info: Run R0b213d94e
[13:29:25] Info: (2026-02-02) Run Baysor v0.7.1
[13:29:25] Info: Using local Baysor build
[13:29:25] Info: Loading data...
[13:29:25] Info: Loaded 301410 transcripts, 119 genes.
[13:29:25] Info: Estimating noise level
[13:29:26] Info: Done
[13:29:27] Info: Clustering molecules...


[32mProgress:   0%|                                         |  ETA: 0:14:04[39m
[34m                   Iteration: 4[39m
[34m             Max. difference: 0.616[39m
[A4m   Fraction of probs changed: 0.977[39m


[32mProgress:   0%|                                         |  ETA: 0:17:03[39m
[34m                   Iteration: 7[39m
[34m             Max. difference: 0.543[39m
[A4m   Fraction of probs changed: 0.976[39m


[32mProgress:   0%|                                         |  ETA: 0:18:20[39m
[34m                   Iteration: 10[39m
[34m             Max. difference: 0.475[39m
[A4m   Fraction of probs changed: 0.975[39m


[32mProgress:   0%|                                         |  ETA: 0:18:51[39m
[34m                   Iteration: 13[39m
[34m             Max. difference: 0.401[39m
[A4m   Fraction of probs changed: 0.974[39m


[32mProgress:   0%|▏                                        |  ETA: 0:19:21[39m
[34m                   Iteration: 16[39m


[13:29:54] Info: Algorithm stopped after 219 iterations. Max. probability difference: 0.0087. Converged: true.
[13:29:54] Info: Done
[13:29:54] Info: Initializing algorithm. Scale: 31.0, scale std: 7.75, initial #components: 30140, #molecules: 301410.
[13:29:55] Info: Using the following additional information about molecules: [:confidence, :cluster]
[13:29:55] Info: Using 2D coordinates


[32mProgress:   1%|▎                                        |  ETA: 0:00:25[39m
[34m         Iteration: 3[39m
[34m    Noise level, %: 21.89[39m
[A4m   Num. components: 1840[39m


[32mProgress:   1%|▌                                        |  ETA: 0:00:23[39m
[34m         Iteration: 6[39m
[34m    Noise level, %: 18.62[39m
[A4m   Num. components: 2912[39m


[32mProgress:   2%|▊                                        |  ETA: 0:00:27[39m
[34m         Iteration: 9[39m
[34m    Noise level, %: 16.21[39m
[A4m   Num. components: 3627[39m


[32mProgress:   3%|█▏                                       |  ETA: 0:00:25[39m
[34m         Iteration: 13[39m
[34m    Noise level, %: 3.25[39m
[A4m   Num. components: 5070[39m


[32mProgress:   3%|█▎                                       |  ETA: 0:00:24[39m
[34m         Iteration: 16[39m
[34m    Noise level, %: 3.21[39m
[A4m   Num. components: 5271[39m


[32mProgress:   4%|█▌                                       |  E

[13:30:11] Info: Processing complete.
[13:30:12] Info: Estimating boundary polygons
[13:30:12] Info: Saving results to /Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02B/m20_s31/segmentation


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


[13:30:13] Info: All done!
[3/27]
Running Baysor on: /Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02C/spots.csv
[13:30:13] Info: Run Reb8be0706
[13:30:13] Info: (2026-02-02) Run Baysor v0.7.1
[13:30:13] Info: Using local Baysor build
[13:30:13] Info: Loading data...
[13:30:13] Info: Loaded 439928 transcripts, 119 genes.
[13:30:13] Info: Estimating noise level
[13:30:14] Info: Done
[13:30:15] Info: Clustering molecules...


[32mProgress:   0%|                                         |  ETA: 0:23:07[39m
[34m                   Iteration: 3[39m
[34m             Max. difference: 0.638[39m
[A4m   Fraction of probs changed: 0.979[39m


[32mProgress:   0%|                                         |  ETA: 0:26:32[39m
[34m                   Iteration: 5[39m
[34m             Max. difference: 0.56[39m
[A4m   Fraction of probs changed: 0.978[39m


[32mProgress:   0%|                                         |  ETA: 0:27:28[39m
[34m                   Iteration: 7[39m
[34m             Max. difference: 0.467[39m
[A4m   Fraction of probs changed: 0.977[39m


[32mProgress:   0%|                                         |  ETA: 0:29:22[39m
[34m                   Iteration: 9[39m
[34m             Max. difference: 0.579[39m
[A4m   Fraction of probs changed: 0.977[39m


[32mProgress:   0%|                                         |  ETA: 0:29:30[39m
[34m                   Iteration: 11[39m
[34

[13:31:18] Info: Algorithm stopped after 337 iterations. Max. probability difference: 0.00248. Converged: true.
[13:31:18] Info: Done
[13:31:18] Info: Initializing algorithm. Scale: 31.0, scale std: 7.75, initial #components: 43992, #molecules: 439928.
[13:31:19] Info: Using the following additional information about molecules: [:confidence, :cluster]
[13:31:19] Info: Using 2D coordinates


[32mProgress:   0%|▏                                        |  ETA: 0:00:35[39m
[34m         Iteration: 2[39m
[34m    Noise level, %: 3.36[39m
[A4m   Num. components: 3591[39m


[32mProgress:   1%|▍                                        |  ETA: 0:00:35[39m
[34m         Iteration: 4[39m
[34m    Noise level, %: 4.19[39m
[A4m   Num. components: 5728[39m


[32mProgress:   1%|▍                                        |  ETA: 0:00:40[39m
[34m         Iteration: 5[39m
[34m    Noise level, %: 3.34[39m
[A4m   Num. components: 6094[39m


[32mProgress:   1%|▋                                        |  ETA: 0:00:37[39m
[34m         Iteration: 7[39m
[34m    Noise level, %: 3.77[39m
[A4m   Num. components: 6917[39m


[32mProgress:   2%|▊                                        |  ETA: 0:00:41[39m
[34m         Iteration: 9[39m
[34m    Noise level, %: 17.22[39m
[A4m   Num. components: 5918[39m


[32mProgress:   2%|█                                        |  ETA: 

[13:31:41] Info: Processing complete.
[13:31:42] Info: Estimating boundary polygons
[13:31:42] Info: Saving results to /Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02C/m20_s31/segmentation


[32mProgress: 100%|█████████████████████████████████████████| Time: 0:00:00[39m


[13:31:43] Info: All done!
[4/27]
Running Baysor on: /Volumes/processing2/nature-dev-mouse-reanalysis/data/section_02D/spots.csv
[13:31:43] Info: Run R2114e8aad
[13:31:43] Info: (2026-02-02) Run Baysor v0.7.1
[13:31:43] Info: Using local Baysor build
[13:31:43] Info: Loading data...
[13:31:43] Info: Loaded 512651 transcripts, 119 genes.
[13:31:43] Info: Estimating noise level
[13:31:45] Info: Done
[13:31:45] Info: Clustering molecules...


[32mProgress:   0%|                                         |  ETA: 0:25:27[39m
[34m                   Iteration: 2[39m
[34m             Max. difference: 0.685[39m
[A4m   Fraction of probs changed: 0.979[39m


[32mProgress:   0%|                                         |  ETA: 0:31:22[39m
[34m                   Iteration: 4[39m
[34m             Max. difference: 0.61[39m
[A4m   Fraction of probs changed: 0.978[39m


[32mProgress:   0%|                                         |  ETA: 0:33:03[39m
[34m                   Iteration: 6[39m
[34m             Max. difference: 0.641[39m
[A4m   Fraction of probs changed: 0.977[39m


[32mProgress:   0%|                                         |  ETA: 0:35:15[39m
[34m                   Iteration: 8[39m
[34m             Max. difference: 0.608[39m
[A4m   Fraction of probs changed: 0.976[39m


[32mProgress:   0%|                                         |  ETA: 0:36:08[39m
[34m                   Iteration: 10[39m
[34