In [2]:
import scanpy as sc
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import umap

# --- 1. Load Data ---
# We compare against the TRAINING latents (because that's what the flow learned)
# Update this path to where your training data with latents is stored
TRAIN_DATA_PATH = "/dtu/blackhole/06/213542/paperdata/pbmc3k_train_with_latent.h5ad"
GEN_LATENT_PATH = "/dtu/blackhole/06/213542/paperdata/simple_generated_latent.pt" # Note: Load the latent, not counts!

print("Loading data...")
adata_train = sc.read_h5ad(TRAIN_DATA_PATH)
real_latents = adata_train.obsm["X_latent"]

# Load Generated Latents
# (We need the output of the Flow model, before decoding)
# If you haven't saved the intermediate latents in the new loop, you might need to grab them from the 'generated_full' tensor in the training notebook
# For now, let's assume you saved it or can access the variable 'generated_full' directly if running in the same session.
if 'generated_full' in locals():
    gen_latents = generated_full.cpu().numpy()
else:
    # If you saved it to a file in the previous step, load it here:
    # You might need to update your training script to save 'generated_latent.pt' *after* the loop.
    try:
        gen_latents = torch.load(GEN_LATENT_PATH, map_location='cpu').numpy()
    except:
        print("Could not find generated_latent.pt. Please ensure you save the 'generated_full' tensor in the training script.")
        # Stop here if we can't find data
        gen_latents = None

if gen_latents is not None:
    print(f"Real Latents: {real_latents.shape}")
    print(f"Gen Latents:  {gen_latents.shape}")

    # --- 2. Run UMAP on Latents ---
    print("Running UMAP on Latent Space...")
    reducer = umap.UMAP()
    
    # Combine for joint reduction
    combined = np.vstack([real_latents, gen_latents])
    labels = np.array(["Real"] * len(real_latents) + ["Generated"] * len(gen_latents))
    
    embedding = reducer.fit_transform(combined)

    # --- 3. Plot ---
    plt.figure(figsize=(8, 6))
    # Plot Real (Blue)
    mask_real = labels == "Real"
    plt.scatter(embedding[mask_real, 0], embedding[mask_real, 1], 
                c='tab:blue', s=10, alpha=0.3, label='Real (Train)')
    
    # Plot Generated (Orange)
    mask_gen = labels == "Generated"
    plt.scatter(embedding[mask_gen, 0], embedding[mask_gen, 1], 
                c='tab:orange', s=10, alpha=0.5, label='Generated')
    
    plt.legend()
    plt.title("Latent Space Comparison (Flow Output)")
    plt.xlabel("UMAP 1")
    plt.ylabel("UMAP 2")
    plt.show()

Loading data...
Could not find generated_latent.pt. Please ensure you save the 'generated_full' tensor in the training script.
