# Lecture 10: Deep Learning Part 1 - SOLUTION

**Date:** January 3, 2026

---

In [None]:
import scanpy as sc
import scvi
import matplotlib.pyplot as plt

scvi.settings.verbosity = 3

## Task 1: Preparing Data for scVI (20 points)

In [None]:
# Load PBMC 10k
adata = sc.datasets.pbmc10k()
print(f"Loaded: {adata.n_obs} cells × {adata.n_vars} genes")

# Basic QC
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], inplace=True)
adata = adata[adata.obs['pct_counts_mt'] < 20, :]

# Register for scVI
scvi.model.SCVI.setup_anndata(adata, layer=None)
print(f"\nPrepared: {adata.n_obs} cells × {adata.n_vars} genes")

## Task 2: Training scVI Model (30 points)

In [None]:
# Create scVI model
model = scvi.model.SCVI(adata, n_latent=30, n_layers=2)

# Train
model.train(max_epochs=400, early_stopping=True)

# Plot training history
train_elbo = model.history['elbo_train'][10:]
plt.plot(train_elbo, label='Training ELBO')
plt.xlabel('Epoch')
plt.ylabel('ELBO Loss')
plt.title('scVI Training Curve')
plt.legend()
plt.grid(alpha=0.3)
plt.show()

print("scVI model trained successfully!")

## Task 3: Latent Representation Analysis (30 points)

In [None]:
# Extract latent representation
adata.obsm['X_scvi'] = model.get_latent_representation()

# Compute UMAP on latent space
sc.pp.neighbors(adata, use_rep='X_scvi')
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=0.5)

# Visualize
sc.pl.umap(adata, color=['leiden', 'total_counts'], ncols=2)

# Compare with PCA
sc.pp.pca(adata)
sc.pp.neighbors(adata, use_rep='X_pca')
sc.tl.umap(adata)
sc.pl.umap(adata, color='leiden', title='PCA-based UMAP')

print("Latent space analyzed!")

## Task 4: Denoised Expression (20 points)

In [None]:
# Get denoised expression
adata.layers['scvi_normalized'] = model.get_normalized_expression()

# Compare raw vs denoised for marker genes
markers = ['CD3D', 'CD79A', 'CD14']

fig, axes = plt.subplots(2, 3, figsize=(15, 10))

for i, gene in enumerate(markers):
    # Raw
    sc.pl.umap(adata, color=gene, use_raw=True, ax=axes[0, i], 
               title=f'{gene} (Raw)', show=False)
    # Denoised
    sc.pl.umap(adata, color=gene, layer='scvi_normalized', ax=axes[1, i],
               title=f'{gene} (Denoised)', show=False)

plt.tight_layout()
plt.show()

print("Denoising complete!")
print("Advantages: Reduced dropout, smoother expression, better marker detection")

---

## Summary

- ✓ scVI model training
- ✓ Latent space analysis
- ✓ Denoised expression
- ✓ Comparison with PCA