# Group 2 Claim 1: V-ATPase Subunit Analysis (Colab Version)
## Testing: "V-ATPase subunits show differential expression patterns"

**Colab-optimized**: All 24 V-ATPase subunits embedded directly - no external files needed!

This notebook analyzes V-ATPase (vacuolar ATPase) subunit expression between tau-positive and tau-negative neurons.

In [None]:
# Google Colab Setup
import os
IN_COLAB = 'COLAB_GPU' in os.environ

if IN_COLAB:
    print("Running in Google Colab")
    # Install required packages
    !pip install -q pertpy pydeseq2 scanpy
    
    # Upload file prompt
    from google.colab import files
    print("\nPlease upload pool_processed_v2.h5ad when prompted:")
    uploaded = files.upload()
    data_path = 'pool_processed_v2.h5ad'
else:
    print("Running locally")
    data_path = '../../data/pool_processed_v2.h5ad'

In [None]:
# Import required packages
import pertpy as pt
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')
print("Packages loaded successfully")

## 1. Load and Prepare Data

In [None]:
# Load the proteomics data
adata = sc.read_h5ad(data_path)
print(f"Data shape: {adata.shape}")

# Standardize column names
column_mapping = {
    'TauStatus': 'tau_status',
    'MC1': 'mc1_score', 
    'Pseudotime': 'pseudotime'
}

for old_name, new_name in column_mapping.items():
    if old_name in adata.obs.columns and new_name not in adata.obs.columns:
        adata.obs[new_name] = adata.obs[old_name]

# Ensure tau_status is categorical
if 'tau_status' not in adata.obs.columns:
    if 'TauStatus' in adata.obs.columns:
        adata.obs['tau_status'] = adata.obs['TauStatus']
    else:
        raise ValueError("Cannot find tau status column")

adata.obs['tau_status'] = pd.Categorical(adata.obs['tau_status'])
adata.obs['tau_positive'] = (adata.obs['tau_status'] == 'positive').astype(int)

print(f"\nTau-positive samples: {adata.obs['tau_positive'].sum()}")
print(f"Tau-negative samples: {(adata.obs['tau_positive'] == 0).sum()}")

# Prepare protein names
if 'GeneName' in adata.var.columns:
    adata.var['protein_name'] = adata.var['GeneName']
elif 'gene_name' in adata.var.columns:
    adata.var['protein_name'] = adata.var['gene_name']
else:
    adata.var['protein_name'] = adata.var.index

# Create counts layer if needed
if 'counts' not in adata.layers:
    print("Creating pseudo-counts from log2 data...")
    adata.layers['counts'] = np.power(2, adata.X) * 1000
    adata.layers['counts'] = np.round(adata.layers['counts']).astype(int)

# Ensure dense matrix
if hasattr(adata.X, 'toarray'):
    adata.X = adata.X.toarray()

print("\nData preparation complete!")

## 2. Define V-ATPase Subunits (Embedded)

V-ATPase is a multi-subunit complex responsible for lysosomal acidification, critical for autophagy and protein degradation.

In [None]:
# COMPLETE V-ATPase SUBUNIT DEFINITIONS
# All 24 V-ATPase subunits embedded directly for Colab compatibility

vatpase_subunits = {
    # V0 DOMAIN (Membrane-embedded, proton translocation)
    'V0_domain': [
        'ATP6V0A1',  # a1 isoform - neuronal specific
        'ATP6V0A2',  # a2 isoform - ubiquitous
        'ATP6V0A4',  # a4 isoform - kidney/epididymis
        'ATP6V0B',   # b subunit
        'ATP6V0C',   # c subunit - forms proton channel
        'ATP6V0D1',  # d1 isoform - ubiquitous
        'ATP6V0D2',  # d2 isoform - tissue-specific
        'ATP6V0E1',  # e1 isoform
        'ATP6V0E2',  # e2 isoform
        'ATP6AP1',   # Accessory protein 1 (Ac45)
        'ATP6AP2'    # Accessory protein 2 (PRR)
    ],
    
    # V1 DOMAIN (Cytoplasmic, ATP hydrolysis)
    'V1_domain': [
        'ATP6V1A',   # A subunit - catalytic
        'ATP6V1B1',  # B1 isoform - kidney
        'ATP6V1B2',  # B2 isoform - brain/ubiquitous
        'ATP6V1C1',  # C1 isoform - ubiquitous
        'ATP6V1C2',  # C2 isoform - lung/kidney
        'ATP6V1D',   # D subunit - central rotor
        'ATP6V1E1',  # E1 isoform - ubiquitous
        'ATP6V1E2',  # E2 isoform - testis
        'ATP6V1F',   # F subunit - central rotor
        'ATP6V1G1',  # G1 isoform - ubiquitous
        'ATP6V1G2',  # G2 isoform - brain enriched
        'ATP6V1G3',  # G3 isoform - kidney
        'ATP6V1H'    # H subunit - regulatory
    ]
}

# Flatten for analysis
all_vatpase = vatpase_subunits['V0_domain'] + vatpase_subunits['V1_domain']

# Additional V-ATPase related proteins
vatpase_related = {
    'Assembly_factors': [
        'VMA21',     # V-ATPase assembly factor
        'CCDC115',   # V-ATPase assembly factor
        'ATP6AP1L',  # Accessory protein 1-like
        'TMEM199',   # V-ATPase assembly
        'CCDC115'    # Assembly factor
    ],
    
    'Regulatory': [
        'RILP',      # Rab-interacting lysosomal protein
        'LAMTOR1',   # Late endosomal/lysosomal adaptor
        'LAMTOR2',   # Regulator of V-ATPase
        'LAMTOR3',   # V-ATPase-Ragulator complex
        'LAMTOR4',
        'LAMTOR5'
    ]
}

print("V-ATPase Subunit Summary:")
print("="*50)
print(f"V0 domain (membrane): {len(vatpase_subunits['V0_domain'])} subunits")
print(f"V1 domain (cytoplasmic): {len(vatpase_subunits['V1_domain'])} subunits")
print(f"Total core V-ATPase: {len(all_vatpase)} subunits")
print(f"\nAdditional related proteins: {sum(len(v) for v in vatpase_related.values())}")
print("\nBiological Functions:")
print("- Lysosomal acidification (pH 4.5-5.0)")
print("- Essential for autophagy and protein degradation")
print("- Neurotransmitter loading in synaptic vesicles")
print("- Bone resorption and renal acid secretion")

## 3. Find V-ATPase Proteins in Dataset

In [None]:
# Search for V-ATPase proteins in the dataset
protein_names = adata.var['protein_name'] if 'protein_name' in adata.var else adata.var.index
protein_list = protein_names.tolist()

# Track found proteins by domain
found_v0 = []
found_v1 = []
found_related = []
missing_vatpase = []

# Search for V0 domain proteins
print("Searching for V-ATPase proteins...\n")

for protein in vatpase_subunits['V0_domain']:
    # Try multiple search strategies
    if protein in protein_list:
        found_v0.append(protein)
    else:
        # Try case-insensitive
        matches = [p for p in protein_list if protein.upper() in p.upper()]
        if matches:
            found_v0.append(matches[0])
        else:
            # Try without ATP6 prefix
            short_name = protein.replace('ATP6', '')
            matches = [p for p in protein_list if short_name in p.upper()]
            if matches:
                found_v0.append(matches[0])
            else:
                missing_vatpase.append(('V0', protein))

# Search for V1 domain proteins
for protein in vatpase_subunits['V1_domain']:
    if protein in protein_list:
        found_v1.append(protein)
    else:
        matches = [p for p in protein_list if protein.upper() in p.upper()]
        if matches:
            found_v1.append(matches[0])
        else:
            short_name = protein.replace('ATP6', '')
            matches = [p for p in protein_list if short_name in p.upper()]
            if matches:
                found_v1.append(matches[0])
            else:
                missing_vatpase.append(('V1', protein))

# Combine all found V-ATPase proteins
found_vatpase = found_v0 + found_v1

# Print search results
print("V-ATPase Protein Search Results:")
print("="*50)
print(f"V0 domain: {len(found_v0)}/{len(vatpase_subunits['V0_domain'])} found")
print(f"V1 domain: {len(found_v1)}/{len(vatpase_subunits['V1_domain'])} found")
print(f"Total found: {len(found_vatpase)}/{len(all_vatpase)} ({len(found_vatpase)/len(all_vatpase)*100:.1f}%)")

if found_vatpase:
    print(f"\n✓ Found V-ATPase proteins for analysis")
    if len(found_v0) > 0:
        print(f"  V0 proteins: {found_v0[:3]}..." if len(found_v0) > 3 else f"  V0 proteins: {found_v0}")
    if len(found_v1) > 0:
        print(f"  V1 proteins: {found_v1[:3]}..." if len(found_v1) > 3 else f"  V1 proteins: {found_v1}")

if missing_vatpase:
    print(f"\n⚠ Missing {len(missing_vatpase)} proteins:")
    for domain, protein in missing_vatpase[:5]:
        print(f"  {domain}: {protein}")
    if len(missing_vatpase) > 5:
        print(f"  ... and {len(missing_vatpase)-5} more")

## 4. Subset Data and Run PyDESeq2

In [None]:
# Create V-ATPase subset for analysis
if found_vatpase:
    # Get indices of V-ATPase proteins
    vatpase_indices = [i for i, p in enumerate(protein_list) if p in found_vatpase]
    adata_vatpase = adata[:, vatpase_indices].copy()
    
    print(f"V-ATPase subset shape: {adata_vatpase.shape}")
    print(f"  Samples: {adata_vatpase.n_obs}")
    print(f"  V-ATPase proteins: {adata_vatpase.n_vars}")
    
    # Run PyDESeq2
    print("\nRunning differential expression analysis...")
    
    try:
        # Use counts if available
        if 'counts' in adata_vatpase.layers:
            adata_vatpase.layers['log2'] = adata_vatpase.X.copy()
            adata_vatpase.X = adata_vatpase.layers['counts'].copy()
        
        # Initialize PyDESeq2
        pds2 = pt.tl.PyDESeq2(
            adata=adata_vatpase,
            design="~tau_status",
            refit_cooks=True
        )
        
        pds2.fit()
        
        # Test contrast
        results_vatpase = pds2.test_contrasts(
            pds2.contrast(
                column="tau_status",
                baseline="negative",
                group_to_compare="positive"
            )
        )
        
        print("✓ PyDESeq2 analysis completed")
        
    except Exception as e:
        print(f"PyDESeq2 failed: {e}")
        print("\nUsing fallback traditional analysis...")
        
        # Restore log2 data if needed
        if 'log2' in adata_vatpase.layers:
            adata_vatpase.X = adata_vatpase.layers['log2'].copy()
        
        # Traditional differential expression
        results_list = []
        tau_pos = adata_vatpase.obs['tau_status'] == 'positive'
        tau_neg = adata_vatpase.obs['tau_status'] == 'negative'
        
        for i in range(adata_vatpase.n_vars):
            expr_pos = adata_vatpase.X[tau_pos, i]
            expr_neg = adata_vatpase.X[tau_neg, i]
            
            # Remove NaN values
            expr_pos = expr_pos[~np.isnan(expr_pos)]
            expr_neg = expr_neg[~np.isnan(expr_neg)]
            
            if len(expr_pos) > 3 and len(expr_neg) > 3:
                log2fc = np.mean(expr_pos) - np.mean(expr_neg)
                tstat, pval = stats.ttest_ind(expr_pos, expr_neg)
                
                # Cohen's d effect size
                pooled_std = np.sqrt(((len(expr_pos)-1)*np.var(expr_pos) + 
                                     (len(expr_neg)-1)*np.var(expr_neg)) / 
                                    (len(expr_pos) + len(expr_neg) - 2))
                cohen_d = log2fc / pooled_std if pooled_std > 0 else 0
            else:
                log2fc = 0
                pval = 1
                tstat = 0
                cohen_d = 0
            
            results_list.append({
                'protein': adata_vatpase.var.index[i],
                'log2FoldChange': log2fc,
                'pvalue': pval,
                'stat': tstat,
                'cohen_d': cohen_d
            })
        
        results_vatpase = pd.DataFrame(results_list)
        
        # Add FDR correction
        from statsmodels.stats.multitest import multipletests
        results_vatpase['padj'] = multipletests(results_vatpase['pvalue'], method='fdr_bh')[1]
        
        print("✓ Traditional analysis completed")
else:
    print("No V-ATPase proteins found for analysis")
    results_vatpase = pd.DataFrame()

## 5. Analyze Results by Domain

In [None]:
if len(results_vatpase) > 0:
    # Add domain annotation
    results_vatpase['domain'] = results_vatpase['protein'].apply(
        lambda x: 'V0' if any(v0 in x for v0 in found_v0) else 'V1' if any(v1 in x for v1 in found_v1) else 'Unknown'
    )
    
    # Overall statistics
    print("\nV-ATPase Differential Expression Summary:")
    print("="*60)
    
    n_sig = (results_vatpase['padj'] < 0.05).sum()
    n_total = len(results_vatpase)
    
    print(f"Total V-ATPase subunits analyzed: {n_total}")
    print(f"Significant (FDR < 0.05): {n_sig} ({n_sig/n_total*100:.1f}%)")
    
    # Direction of changes
    if n_sig > 0:
        sig_df = results_vatpase[results_vatpase['padj'] < 0.05]
        n_up = (sig_df['log2FoldChange'] > 0).sum()
        n_down = (sig_df['log2FoldChange'] < 0).sum()
        print(f"  Upregulated: {n_up}")
        print(f"  Downregulated: {n_down}")
    
    # Domain-specific analysis
    print("\nAnalysis by Domain:")
    print("-"*60)
    
    for domain in ['V0', 'V1']:
        domain_df = results_vatpase[results_vatpase['domain'] == domain]
        if len(domain_df) > 0:
            n_sig_domain = (domain_df['padj'] < 0.05).sum()
            mean_fc = domain_df['log2FoldChange'].mean()
            sig_pct = n_sig_domain/len(domain_df)*100
            
            print(f"{domain} domain:")
            print(f"  Proteins: {len(domain_df)}")
            print(f"  Significant: {n_sig_domain} ({sig_pct:.1f}%)")
            print(f"  Mean log2FC: {mean_fc:.3f}")
            
            if n_sig_domain > 0:
                sig_domain = domain_df[domain_df['padj'] < 0.05]
                print(f"  Direction: {(sig_domain['log2FoldChange'] > 0).sum()} up, "
                      f"{(sig_domain['log2FoldChange'] < 0).sum()} down")
    
    # Top differentially expressed
    print("\nTop Differentially Expressed V-ATPase Subunits:")
    print("-"*60)
    print(f"{'Protein':15} {'Domain':8} {'Log2FC':>8} {'P-value':>10} {'FDR':>10}")
    print("-"*60)
    
    top_vatpase = results_vatpase.nsmallest(min(5, len(results_vatpase)), 'padj')
    for _, row in top_vatpase.iterrows():
        direction = "↑" if row['log2FoldChange'] > 0 else "↓"
        print(f"{row['protein']:15} {row['domain']:8} {direction}{abs(row['log2FoldChange']):7.2f} "
              f"{row['pvalue']:10.2e} {row['padj']:10.2e}")

## 6. Create Volcano Plot

In [None]:
if len(results_vatpase) > 0:
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # Calculate -log10(p-value)
    results_vatpase['neg_log10_pval'] = -np.log10(results_vatpase['pvalue'] + 1e-300)
    
    # Color by domain and significance
    colors = []
    sizes = []
    for _, row in results_vatpase.iterrows():
        if row['padj'] < 0.05:
            if row['domain'] == 'V0':
                colors.append('red')
                sizes.append(150)
            elif row['domain'] == 'V1':
                colors.append('blue')
                sizes.append(150)
            else:
                colors.append('green')
                sizes.append(150)
        else:
            colors.append('gray')
            sizes.append(50)
    
    # Create scatter plot
    scatter = ax.scatter(results_vatpase['log2FoldChange'],
                        results_vatpase['neg_log10_pval'],
                        c=colors, alpha=0.7, s=sizes, edgecolors='black', linewidth=0.5)
    
    # Add threshold lines
    ax.axhline(y=-np.log10(0.05), color='black', linestyle='--', alpha=0.3, label='p = 0.05')
    ax.axvline(x=0.5, color='black', linestyle='--', alpha=0.3)
    ax.axvline(x=-0.5, color='black', linestyle='--', alpha=0.3)
    ax.axvline(x=0, color='black', linestyle='-', alpha=0.3, linewidth=0.5)
    
    # Label significant proteins
    for _, row in results_vatpase.iterrows():
        if row['padj'] < 0.05:
            ax.annotate(row['protein'],
                       (row['log2FoldChange'], row['neg_log10_pval']),
                       fontsize=8, alpha=0.8,
                       xytext=(5, 5), textcoords='offset points',
                       ha='left' if row['log2FoldChange'] > 0 else 'right')
    
    ax.set_xlabel('Log2 Fold Change (Tau+ vs Tau-)', fontsize=12)
    ax.set_ylabel('-Log10(p-value)', fontsize=12)
    ax.set_title('V-ATPase Subunits Differential Expression\nLysosomal Acidification Machinery',
                fontsize=14, fontweight='bold')
    
    # Add grid
    ax.grid(True, alpha=0.3)
    
    # Add legend
    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor='red', alpha=0.7, label=f'V0 domain (membrane, n={len(found_v0)})'),
        Patch(facecolor='blue', alpha=0.7, label=f'V1 domain (cytoplasmic, n={len(found_v1)})'),
        Patch(facecolor='gray', alpha=0.7, label='Not significant')
    ]
    ax.legend(handles=legend_elements, loc='upper left')
    
    # Add statistics text
    stats_text = f"Significant: {n_sig}/{n_total} ({n_sig/n_total*100:.1f}%)"
    ax.text(0.95, 0.05, stats_text, transform=ax.transAxes,
           ha='right', va='bottom', fontsize=10,
           bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    plt.tight_layout()
    plt.show()
    
    print("\nVolcano plot created!")

## 7. Domain-Specific Heatmap

In [None]:
if len(results_vatpase) > 0 and len(found_vatpase) > 3:
    # Create expression matrix for heatmap
    vatpase_expr = pd.DataFrame()
    
    # Get expression for each V-ATPase protein
    for protein in found_vatpase:
        if protein in protein_list:
            protein_idx = protein_list.index(protein)
            vatpase_expr[protein] = adata.X[:, protein_idx]
    
    if len(vatpase_expr.columns) > 0:
        # Calculate z-scores for better visualization
        from scipy.stats import zscore
        vatpase_zscore = vatpase_expr.apply(zscore, nan_policy='omit')
        
        # Sort samples by tau status and create index for ordering
        sort_idx = np.argsort(adata.obs['tau_positive'].values)
        vatpase_zscore_sorted = vatpase_zscore.iloc[sort_idx]
        
        # Sort proteins by domain and fold change
        protein_order = []
        for domain in ['V0', 'V1']:
            domain_proteins = results_vatpase[results_vatpase['domain'] == domain].sort_values('log2FoldChange')
            protein_order.extend(domain_proteins['protein'].tolist())
        
        # Filter to proteins we have data for
        protein_order = [p for p in protein_order if p in vatpase_zscore_sorted.columns]
        
        if protein_order:
            vatpase_zscore_sorted = vatpase_zscore_sorted[protein_order]
            
            # Create figure
            fig, ax = plt.subplots(figsize=(12, 8))
            
            # Create heatmap
            im = ax.imshow(vatpase_zscore_sorted.T, cmap='RdBu_r', aspect='auto',
                          vmin=-2, vmax=2, interpolation='nearest')
            
            # Add colorbar
            cbar = plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
            cbar.set_label('Z-score', rotation=270, labelpad=15)
            
            # Add tau status bar at bottom
            tau_colors = ['blue' if x == 0 else 'red' for x in adata.obs['tau_positive'].iloc[sort_idx]]
            for i, color in enumerate(tau_colors):
                ax.add_patch(plt.Rectangle((i-0.5, len(protein_order)-0.5), 1, 0.5, 
                                          color=color, alpha=0.7, transform=ax.transData))
            
            # Set labels
            ax.set_yticks(range(len(protein_order)))
            ax.set_yticklabels(protein_order, fontsize=9)
            ax.set_xticks([0, len(tau_colors)//2, len(tau_colors)-1])
            ax.set_xticklabels(['Tau-', 'Samples', 'Tau+'])
            
            # Add domain separators
            v0_count = sum(1 for p in protein_order if 'V0' in p)
            if v0_count > 0 and v0_count < len(protein_order):
                ax.axhline(y=v0_count-0.5, color='black', linestyle='-', linewidth=2)
            
            # Add domain labels
            ax.text(-1, v0_count/2, 'V0', rotation=90, va='center', ha='right', fontweight='bold')
            ax.text(-1, v0_count + (len(protein_order)-v0_count)/2, 'V1', 
                   rotation=90, va='center', ha='right', fontweight='bold')
            
            ax.set_title('V-ATPase Expression Heatmap\n(Samples sorted by tau status: Blue=Tau-, Red=Tau+)',
                        fontsize=14, fontweight='bold')
            
            plt.tight_layout()
            plt.show()
            
            print("\nHeatmap created!")
    else:
        print("Insufficient data for heatmap")

## 8. Evaluate Claim

In [None]:
print("\n" + "="*70)
print("CLAIM EVALUATION")
print("="*70)
print("Claim: V-ATPase subunits show differential expression patterns")
print()

if len(results_vatpase) > 0:
    # Calculate evaluation metrics
    n_sig = (results_vatpase['padj'] < 0.05).sum()
    n_total = len(results_vatpase)
    percent_sig = n_sig / n_total * 100
    
    # Check for domain-specific patterns
    v0_df = results_vatpase[results_vatpase['domain'] == 'V0']
    v1_df = results_vatpase[results_vatpase['domain'] == 'V1']
    
    v0_sig = (v0_df['padj'] < 0.05).sum() if len(v0_df) > 0 else 0
    v1_sig = (v1_df['padj'] < 0.05).sum() if len(v1_df) > 0 else 0
    
    # Check for differential patterns (different directions)
    has_bidirectional = False
    if n_sig > 0:
        sig_df = results_vatpase[results_vatpase['padj'] < 0.05]
        n_up = (sig_df['log2FoldChange'] > 0).sum()
        n_down = (sig_df['log2FoldChange'] < 0).sum()
        has_bidirectional = n_up > 0 and n_down > 0
    
    print("Analysis Results:")
    print("-"*40)
    print(f"V-ATPase subunits tested: {n_total}/{len(all_vatpase)} available")
    print(f"Significantly changed: {n_sig} ({percent_sig:.1f}%)")
    print(f"V0 domain significant: {v0_sig}/{len(v0_df) if len(v0_df) > 0 else 0}")
    print(f"V1 domain significant: {v1_sig}/{len(v1_df) if len(v1_df) > 0 else 0}")
    print(f"Bidirectional changes: {'Yes' if has_bidirectional else 'No'}")
    print()
    
    # Determine verdict based on evidence strength
    if n_sig >= 4 and percent_sig > 25:
        verdict = "STRONGLY SUPPORTED"
        explanation = f"Multiple V-ATPase subunits show clear differential expression ({n_sig}/{n_total})"
    elif n_sig >= 3 and percent_sig > 20:
        verdict = "SUPPORTED"
        explanation = f"V-ATPase subunits show differential expression patterns ({n_sig}/{n_total})"
    elif n_sig >= 2:
        verdict = "PARTIALLY SUPPORTED"
        explanation = f"Some V-ATPase subunits differentially expressed ({n_sig}/{n_total})"
    elif n_sig == 1:
        verdict = "WEAKLY SUPPORTED"
        explanation = f"Only one V-ATPase subunit significantly changed"
    else:
        verdict = "REFUTED"
        explanation = "No significant differential expression in V-ATPase subunits"
else:
    verdict = "UNSURE"
    explanation = "V-ATPase proteins not found in dataset for analysis"

print("="*70)
print(f"VERDICT: {verdict}")
print("="*70)
print(f"Explanation: {explanation}")

# Additional biological interpretation
if n_sig > 0:
    print("\nKey V-ATPase Changes:")
    print("-"*40)
    for _, row in results_vatpase.nsmallest(min(3, n_sig), 'padj').iterrows():
        if row['padj'] < 0.05:
            direction = "upregulated" if row['log2FoldChange'] > 0 else "downregulated"
            print(f"{row['protein']} ({row['domain']}):")
            print(f"  {direction} (log2FC={row['log2FoldChange']:.2f}, FDR={row['padj']:.3e})")
            
            # Add functional annotation
            if 'V0A1' in row['protein']:
                print("  → Neuronal-specific a1 isoform, critical for synaptic vesicle acidification")
            elif 'V0C' in row['protein']:
                print("  → Forms proton channel, essential for H+ translocation")
            elif 'V1A' in row['protein'] or 'V1B' in row['protein']:
                print("  → ATP hydrolysis catalytic subunit")

print("\n" + "="*70)
print("BIOLOGICAL INTERPRETATION:")
print("="*70)

if verdict in ["STRONGLY SUPPORTED", "SUPPORTED", "PARTIALLY SUPPORTED"]:
    print("• V-ATPase dysfunction indicates lysosomal acidification impairment")
    print("• Compromised lysosomal pH affects:")
    print("  - Autophagy efficiency (reduced protein degradation)")
    print("  - Cathepsin activation (require acidic pH)")
    print("  - Autophagosome-lysosome fusion")
    print("• Links to SQSTM1/p62 accumulation (impaired autophagic flux)")
    print("• Potential therapeutic target for restoring proteostasis")
else:
    print("• V-ATPase function appears relatively preserved")
    print("• Lysosomal acidification may be maintained")
    print("• Other proteostasis mechanisms may be more affected")

## 9. Save Results Summary

In [None]:
# Compile results summary
if len(results_vatpase) > 0:
    summary = {
        'claim': 'V-ATPase differential expression',
        'verdict': verdict,
        'n_proteins_tested': n_total,
        'n_proteins_total': len(all_vatpase),
        'coverage': f"{n_total/len(all_vatpase)*100:.1f}%",
        'n_significant': n_sig,
        'percent_significant': f"{percent_sig:.1f}%",
        'v0_significant': v0_sig,
        'v1_significant': v1_sig,
        'bidirectional_changes': has_bidirectional,
        'top_protein': results_vatpase.nsmallest(1, 'padj')['protein'].values[0] if n_sig > 0 else None,
        'top_log2fc': results_vatpase.nsmallest(1, 'padj')['log2FoldChange'].values[0] if n_sig > 0 else None,
        'top_padj': results_vatpase.nsmallest(1, 'padj')['padj'].values[0] if n_sig > 0 else None
    }
    
    print("\n" + "="*70)
    print("ANALYSIS SUMMARY:")
    print("="*70)
    for key, value in summary.items():
        if value is not None:
            if isinstance(value, float):
                print(f"{key:25} {value:.3e}" if value < 0.01 else f"{key:25} {value:.3f}")
            else:
                print(f"{key:25} {value}")
    
    # Save if not in Colab
    if not IN_COLAB:
        results_vatpase.to_csv('group2_claim1_vatpase.csv', index=False)
        print("\nResults saved to: group2_claim1_vatpase.csv")
        
        summary_df = pd.DataFrame([summary])
        summary_df.to_csv('group2_claim1_summary.csv', index=False)
        print("Summary saved to: group2_claim1_summary.csv")
    else:
        print("\nRunning in Colab - results kept in memory")
        print("To download results:")
        print("  results_vatpase.to_csv('vatpase_results.csv')")
        print("  files.download('vatpase_results.csv')")
else:
    print("\nNo V-ATPase analysis results to save")

## Summary

This **Colab-optimized** V-ATPase analysis:

1. **Tests all 24 V-ATPase subunits** for differential expression
2. **Analyzes V0 (membrane) and V1 (cytoplasmic) domains** separately
3. **Evaluates the claim** of differential expression patterns
4. **Links to lysosomal dysfunction** and autophagy impairment
5. **Provides comprehensive visualization** with volcano plot and heatmap
6. **Delivers objective verdict** based on statistical evidence

### Key Features:
- **Self-contained**: All 24 V-ATPase subunits embedded directly
- **Works in Google Colab**: Just upload pool_processed_v2.h5ad
- **Domain-specific analysis**: V0 vs V1 functional domains
- **Biological context**: Links to proteostasis and neurodegeneration
- **Clear interpretation**: Therapeutic implications highlighted

V-ATPase dysfunction is critical in neurodegeneration as it impairs lysosomal acidification, compromising the cell's ability to degrade proteins and clear aggregates through autophagy.