#sc‑RNA‑seq — Python (Scanpy)
- pipeline PBMC 3k analysis

# Install packeges 


In [None]:
import scanpy as sc
adata = sc.datasets.pbmc3k()  # matrice gènes×cellules
adata

In [None]:
import scanpy as sc
import numpy as np

# QC
adata.var['mt'] = adata.var_names.str.upper().str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, inplace=True)

# filter 
sc.pp.filter_cells(adata, min_counts=500)
sc.pp.filter_cells(adata, max_counts=40000)
adata = adata[adata.obs['pct_counts_mt'] < 10].copy()
sc.pp.filter_genes(adata, min_cells=3)

# Normalization 
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# variable genes
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True, flavor='seurat')

# PCA
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata, svd_solver='arpack')

# Voisinage, UMAP, clustering
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=40)
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=1.0)

# Visualizations : 

sc.pl.umap(adata, color=['leiden', 'pct_counts_mt'], wspace=0.4)

In [None]:
#Annotation initiale par marqueurs
markers = {
    'Naive T': ['CCR7', 'IL7R'],
    'Memory T': ['S100A4'],
    'B': ['MS4A1'],
    'NK': ['NKG7', 'GNLY'],
    'Monocytes': ['LYZ'],
    'Dendritic': ['FCER1A', 'CST3'],
}
for celltype, genes in markers.items():
    present = [g for g in genes if g in adata.var_names]
    if present:
        sc.pl.umap(adata, color=present, title=[f"{celltype}: {', '.join(present)}"]*len(present), wspace=0.4)

In [None]:
# Différentiel d'expression (exp)
# Comparer les gènes par cluster : 
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)

# Récupérer les résultats sous forme de table
import pandas as pd
res = sc.get.rank_genes_groups_df(adata, group=None)
res.head()