# QC Filter A

**Pinned Environment:** [`envs/sc-spatial.yaml`](../.../envs/sc-spatial.yaml)  

In [None]:
import os
from pathlib import Path
import scanpy as sc
import sys
import warnings

In [None]:
warnings.filterwarnings("ignore", category=FutureWarning)

### Set paths, import data

In [None]:
sys.path.append(str(Path.cwd().resolve().parents[1]))

from config.paths import BASE_DIR

base_dir = BASE_DIR / "data/h5ad/export_01"
input_dir = base_dir / "01_raw"
output_dir = base_dir / "02_pre-filtered"

output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# Make a list of files in the directory
sample_list = os.listdir(input_dir)
sample_list

In [None]:
sample_files = [
    os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(".h5ad")
]
adata_list = [sc.read_h5ad(f) for f in sample_files]

# Summary
for i, adata in enumerate(adata_list, start=1):
    sample_id = adata.obs["sample_id"].unique()[0]
    print(f"Sample {i}: {sample_id}")
    print(f"  n_obs: {adata.n_obs}")
    print(f"  n_vars: {adata.n_vars}")
    print("-" * 40)

## Filter

In [None]:
for adata in adata_list:
    sc.pl.violin(adata, ["total_counts", "n_genes_by_counts", "cell_area"])

In [None]:
# Filter cells by total counts
for i, adata in enumerate(adata_list):
    sample_id = adata.obs["sample_id"].iloc[0]
    print(f"{sample_id} shape before total_counts floor threshold: {adata.shape}")
    
    adata_list[i] = adata[adata.obs["total_counts"] > 20, :].copy()
    
    print(f"{sample_id} shape after total_counts floor threshold: {adata_list[i].shape}")

## Export

In [None]:
for i, adata in enumerate(adata_list):
    if "sample_id" in adata.obs.columns:
        sample_name = adata.obs["sample_id"].loc[adata.obs.index[0]]
    else:
        sample_name = f"sample_{i+1}"

    output_file = os.path.join(output_dir, f"{sample_name}.h5ad")
    adata.write(output_file)
    print(f"Saved: {output_file}")