# FlashS Quickstart

This notebook demonstrates how to use FlashS for spatially variable gene (SVG) detection.

## Installation

```bash
pip install -e ".[full]"
```

## 1. Basic Usage with NumPy/SciPy

FlashS works directly with numpy arrays (coordinates) and scipy sparse matrices (expression).

In [None]:
import numpy as np
from scipy import sparse
from flashs import FlashS

### Create simulated spatial data

We simulate cells on a 2D grid with 200 genes.
The first 20 genes have a spatial gradient pattern; the remaining 180 are random (non-spatial).

In [None]:
rng = np.random.default_rng(42)
n_genes = 200
n_svg = 20  # number of true spatially variable genes

# 2D spatial coordinates (grid layout)
side = 70
n_cells = side * side  # 4900 cells
x = np.repeat(np.arange(side), side)
y = np.tile(np.arange(side), side)
coords = np.column_stack([x, y]).astype(float)

# Sparse expression matrix (cells x genes)
# Non-spatial genes: random sparse
X_random = sparse.random(n_cells, n_genes - n_svg, density=0.05,
                         random_state=42, format="csc")

# Spatial genes: expression correlated with position
spatial_signal = np.zeros((n_cells, n_svg))
for g in range(n_svg):
    angle = rng.uniform(0, 2 * np.pi)
    gradient = np.cos(angle) * coords[:, 0] + np.sin(angle) * coords[:, 1]
    prob = 1 / (1 + np.exp(-0.15 * (gradient - gradient.mean())))
    spatial_signal[:, g] = rng.binomial(1, prob) * rng.poisson(3, n_cells)

X_spatial = sparse.csc_matrix(spatial_signal)
X = sparse.hstack([X_spatial, X_random], format="csc")

print(f"Expression matrix: {X.shape[0]} cells x {X.shape[1]} genes")
print(f"Sparsity: {1 - X.nnz / (X.shape[0] * X.shape[1]):.1%}")
print(f"True SVGs: genes 0-{n_svg - 1}")

### Run FlashS

In [None]:
model = FlashS(random_state=42)
result = model.fit(coords).test(X)

print(f"P-values: {result.pvalues.shape}")
print(f"Effect sizes: {result.effect_size.shape}")
print(f"Q-values: {result.qvalues.shape}")

### Examine results

In [None]:
import pandas as pd

df = pd.DataFrame({
    "gene": [f"gene_{i}" for i in range(n_genes)],
    "pvalue": result.pvalues,
    "qvalue": result.qvalues,
    "effect_size": result.effect_size,
    "is_true_svg": [i < n_svg for i in range(n_genes)],
})
df = df.sort_values("pvalue")

# Significant genes at FDR < 0.05
sig = df[df["qvalue"] < 0.05]
print(f"Significant genes (q < 0.05): {len(sig)}")
print(f"True positives: {sig['is_true_svg'].sum()} / {n_svg}")
print(f"False positives: {(~sig['is_true_svg']).sum()}")
print()
print("Top 10 genes:")
print(df[["gene", "pvalue", "qvalue", "effect_size", "is_true_svg"]].head(10).to_string(index=False))

## 2. AnnData Integration

FlashS integrates with the Scanpy/AnnData ecosystem via `run_flashs()`.

In [None]:
import anndata as ad
from flashs.io import run_flashs

# Create AnnData from the simulated data
adata = ad.AnnData(
    X=X,
    obs=pd.DataFrame(index=[f"cell_{i}" for i in range(n_cells)]),
    var=pd.DataFrame(index=[f"gene_{i}" for i in range(n_genes)]),
    obsm={"spatial": coords},
)

print(f"AnnData: {adata}")

In [None]:
# Run FlashS on AnnData
result = run_flashs(adata, spatial_key="spatial", random_state=42)

# Results are stored in adata.var
print("Columns added to adata.var:")
print([c for c in adata.var.columns if "flashs" in c])
print()
print("Top 10 SVGs:")
print(adata.var.sort_values("flashs_pvalue").head(10)[["flashs_pvalue", "flashs_qvalue", "flashs_effect_size"]])