# Lecture 11: Deep Learning Part 2 - SOLUTION

**Date:** January 9, 2026

---

In [None]:
import scanpy as sc
import scvi

scvi.settings.verbosity = 3

## Task 1: Semi-Supervised Annotation with scANVI (35 points)

In [None]:
# Load data with partial labels
adata = sc.datasets.pbmc10k()
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)

# Simulate partial labels (label 30% of cells)
import numpy as np
np.random.seed(42)
n_labeled = int(0.3 * adata.n_obs)
labeled_idx = np.random.choice(adata.n_obs, n_labeled, replace=False)
adata.obs['cell_type'] = 'Unknown'
adata.obs.loc[adata.obs_names[labeled_idx], 'cell_type'] = 'T cell'  # Simplified

# Train scVI first
scvi.model.SCVI.setup_anndata(adata)
vae = scvi.model.SCVI(adata)
vae.train(max_epochs=100)

# Initialize scANVI from scVI
scanvi_model = scvi.model.SCANVI.from_scvi_model(
    vae, labels_key='cell_type', unlabeled_category='Unknown'
)
scanvi_model.train(max_epochs=100)

# Predict cell types
adata.obs['predicted_cell_type'] = scanvi_model.predict()

# Visualize
adata.obsm['X_scANVI'] = scanvi_model.get_latent_representation()
sc.pp.neighbors(adata, use_rep='X_scANVI')
sc.tl.umap(adata)
sc.pl.umap(adata, color=['cell_type', 'predicted_cell_type'], ncols=2)

print("scANVI annotation complete!")

## Task 2: Batch Integration (35 points)

In [None]:
# Load multi-batch dataset
adata = sc.datasets.pbmc3k()
# Simulate batch effect
adata.obs['batch'] = np.random.choice(['Batch1', 'Batch2'], adata.n_obs)

# Visualize batch effect before integration
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)
sc.pl.umap(adata, color='batch', title='Before Integration')

# Train scVI with batch correction
scvi.model.SCVI.setup_anndata(adata, batch_key='batch')
model = scvi.model.SCVI(adata)
model.train()

# Extract batch-corrected latent space
adata.obsm['X_scvi'] = model.get_latent_representation()

# Compute UMAP on corrected data
sc.pp.neighbors(adata, use_rep='X_scvi')
sc.tl.umap(adata)
sc.pl.umap(adata, color='batch', title='After scVI Integration')

print("Batch integration complete!")

## Task 3: Integration Quality Metrics (30 points)

In [None]:
import scib

# Calculate integration metrics
metrics = scib.metrics.metrics(
    adata, adata_int=adata, batch_key='batch', label_key='leiden',
    embed='X_scvi', isolated_labels_asw_=False, silhouette_=True,
    hvg_score_=False, graph_conn_=True, pcr_=True, kBET_=False,
    ilisi_=False, clisi_=False
)

print("Integration quality metrics:")
print(metrics)

# Compare with Harmony
import scanpy.external as sce
sce.pp.harmony_integrate(adata, 'batch', basis='X_pca', adjusted_basis='X_harmony')
sc.pp.neighbors(adata, use_rep='X_harmony')
sc.tl.umap(adata)
sc.pl.umap(adata, color='batch', title='After Harmony Integration')

print("\nIntegration comparison complete!")
print("scVI pros: Probabilistic, handles dropout, scalable")
print("Harmony pros: Fast, deterministic, linear")

---

## Summary

- ✓ scANVI semi-supervised annotation
- ✓ Batch integration with scVI
- ✓ Integration quality assessment
- ✓ Method comparison