# Lecture 7: Trajectory Inference and Fate Probability - SOLUTION

**Date:** December 26, 2025

---

In [None]:
import scanpy as sc
import scvelo as scv
import cellrank as cr

scv.settings.verbosity = 3
scv.settings.set_figure_params('scvelo')

## Task 1: RNA Velocity Analysis (30 points)

In [None]:
# Load pancreas dataset
adata = scv.datasets.pancreas()
print(f"Loaded: {adata.n_obs} cells × {adata.n_vars} genes")

# Preprocess
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)

# Estimate RNA velocity
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)

# Visualize
scv.pl.velocity_embedding_stream(adata, basis='umap', color='clusters')
scv.pl.velocity_embedding(adata, basis='umap', arrow_length=3, arrow_size=2, color='clusters')

print("RNA velocity computed!")

## Task 2: Identifying Driver Genes (20 points)

In [None]:
# Identify top velocity genes
scv.tl.rank_velocity_genes(adata, groupby='clusters', min_corr=.3)

# Plot top genes
scv.pl.scatter(adata, basis=adata.var['velocity_genes'][:5].index, ncols=5, frameon=False)

# Velocity genes per cluster
df = scv.DataFrame(adata.uns['rank_velocity_genes']['names'])
print("\nTop velocity genes per cluster:")
print(df.head(10))

## Task 3: Trajectory Analysis with CellRank (30 points)

In [None]:
# Compute velocity kernel
vk = cr.kernels.VelocityKernel(adata)
vk.compute_transition_matrix()
ck = cr.kernels.ConnectivityKernel(adata).compute_transition_matrix()

# Combine kernels
combined_kernel = 0.8 * vk + 0.2 * ck

# Estimator
g = cr.estimators.GPCCA(combined_kernel)

# Identify initial and terminal states
g.compute_schur(n_components=10)
g.compute_macrostates(n_states=3, cluster_key='clusters')
g.set_terminal_states(['Alpha', 'Beta', 'Delta'])

# Compute fate probabilities
g.compute_fate_probabilities()

# Visualize
g.plot_fate_probabilities(same_plot=False)
g.plot_fate_probabilities(same_plot=True)

print("CellRank analysis complete!")

## Task 4: Lineage Drivers (20 points)

In [None]:
# Identify lineage drivers
g.compute_lineage_drivers(lineages=['Alpha', 'Beta'], cluster_key='clusters')

# Plot drivers
g.plot_lineage_drivers('Alpha', n_genes=5)
g.plot_lineage_drivers('Beta', n_genes=5)

# Gene expression trends
model = cr.models.GAM(adata)
cr.pl.gene_trends(adata, model, data_key='X', genes=['Ins1', 'Gcg', 'Sst'], 
                  ncols=3, time_key='latent_time')

print("Lineage drivers identified!")

---

## Summary

- ✓ RNA velocity with scVelo
- ✓ Velocity driver genes
- ✓ Cell fate probabilities with CellRank
- ✓ Lineage-specific drivers