In [1]:
import scanpy as sc
import pandas as pd
import os
import numpy as np

# CONFIGURATION
# We use the official Figshare URL for Tabula Muris Senis (Kidney - Droplet)
# This is more robust than the scanpy API which sometimes breaks or downloads all tissues.
ATLAS_URL = "https://figshare.com/ndownloader/files/24187847" 
SAVE_DIR = "../data/external/single_cell_atlases"
FILENAME = "tabula_muris_senis_kidney_droplet.h5ad"

def get_and_validate_atlas():
    # 1. Setup Directory
    os.makedirs(SAVE_DIR, exist_ok=True)
    save_path = os.path.join(SAVE_DIR, FILENAME)
    
    # 2. Check if already exists to avoid re-downloading
    if os.path.exists(save_path):
        print(f"‚úÖ Atlas found locally at: {save_path}")
        adata = sc.read(save_path)
    else:
        print(f"‚¨áÔ∏è Downloading Atlas from official source (approx 150MB)...")
        print(f"   URL: {ATLAS_URL}")
        # backup_url maps to the specific Kidney Droplet object from the Consortium
        adata = sc.read(ATLAS_URL, backup_url=ATLAS_URL)
        
        # Save immediately so we have it
        print(f"üíæ Saving to {save_path}...")
        adata.write(save_path)

    # 3. VERIFICATION (The "Did we get it right?" Step)
    print("\nüîé VERIFYING ATLAS CONTENTS...")
    
    # Check A: Is it Kidney?
    tissues = adata.obs['tissue'].unique()
    print(f"   ‚Ä¢ Tissue detected: {tissues}")
    if 'Kidney' not in tissues:
        raise ValueError("‚ùå Wrong tissue! This is not the Kidney atlas.")

    # Check B: Is it the 'Senis' (Aging) atlas?
    ages = adata.obs['age'].unique()
    print(f"   ‚Ä¢ Ages detected: {sorted(ages)}")
    if not any('24m' in str(age) or '30m' in str(age) for age in ages):
        print("   ‚ö†Ô∏è WARNING: No old mice (24m/30m) found. Is this the standard Tabula Muris?")
    else:
        print("   ‚úÖ confirmed: Contains Aging (Senis) data.")

    # Check C: Does it have your target (DCT) cells?
    # We look for the exact string used in the ontology
    cell_types = adata.obs['cell_ontology_class'].unique()
    dct_match = [ct for ct in cell_types if 'distal convoluted' in str(ct).lower()]
    
    if dct_match:
        print(f"   ‚úÖ TARGET FOUND: {len(dct_match)} DCT cell type(s) identified:")
        for m in dct_match:
            print(f"      - {m}")
    else:
        raise ValueError("‚ùå CRITICAL: No Distal Convoluted Tubule cells found in this atlas!")

    # Check D: Marker Gene Check (The Fingerprint)
    # Slc12a3 is the definitive marker for DCT
    marker = 'Slc12a3'
    if marker in adata.var_names:
        print(f"   ‚úÖ Marker Gene '{marker}' is present in the dataset.")
    else:
        print(f"   ‚ö†Ô∏è WARNING: Marker '{marker}' not found (check gene symbol capitalization).")

    print("\nüéâ SUCCESS: Atlas is valid and ready for deconvolution.")
    return adata

# --- EXECUTE ---
if __name__ == "__main__":
    adata_ref = get_and_validate_atlas()

ModuleNotFoundError: No module named 'scanpy'

In [None]:
sc.datasets.tabula_muris_senis(tissue=)

In [None]:
def validate_atlas_markers(adata):
    """
    Checks if the Atlas actually contains the biology we care about.
    """
    print("\n--- ATLAS VALIDATION ---")
    
    # 1. Check for DCT Cells
    cell_types = adata.obs['cell_ontology_class'].unique()
    if 'kidney distal convoluted tubule epithelial cell' in cell_types:
        print("‚úÖ Atlas contains DCT cells.")
    else:
        print("‚ùå WARNING: No specific DCT annotation found!")
        print(f"Available types: {cell_types}")
        
    # 2. Check for Marker Gene Expression
    # Slc12a3 = NCC (Thiazide-sensitive symporter) -> The Gold Standard for DCT
    marker = 'Slc12a3' 
    
    if marker in adata.var_names:
        print(f"‚úÖ Marker '{marker}' found in gene list.")
        
        # Check expression in DCT cells vs others
        dct_cells = adata[adata.obs['cell_ontology_class'] == 'kidney distal convoluted tubule epithelial cell']
        other_cells = adata[adata.obs['cell_ontology_class'] != 'kidney distal convoluted tubule epithelial cell']
        
        dct_expr = dct_cells[:, marker].X.mean()
        other_expr = other_cells[:, marker].X.mean()
        
        print(f"Expression of {marker}:")
        print(f"  - In DCT cells: {dct_expr:.4f}")
        print(f"  - In Other cells: {other_expr:.4f}")
        
        if dct_expr > (other_expr * 2):
            print("‚úÖ Marker is specific to DCT cells. Atlas is valid.")
        else:
            print("‚ö†Ô∏è WARNING: Marker specificity is low.")
    else:
        print(f"‚ùå Marker '{marker}' not found in Atlas genes. Check gene symbols (e.g., capitalization).")