# Claim 1: UPS Protein Analysis with PertPy (Colab Version)
## Testing: "No significant UPS protein alterations across tau-positive versus tau-negative neurons"

**Colab-optimized**: All 132 UPS proteins embedded directly - no external files needed!

This notebook uses PertPy's PyDESeq2 implementation to rigorously test whether UPS proteins show differential expression between tau-positive and tau-negative neurons.

In [None]:
# Google Colab Setup
import os
IN_COLAB = 'COLAB_GPU' in os.environ

if IN_COLAB:
    print("Running in Google Colab")
    # Install required packages
    !pip install -q pertpy pydeseq2 scanpy
    
    # Upload file prompt
    from google.colab import files
    print("\nPlease upload pool_processed_v2.h5ad when prompted:")
    uploaded = files.upload()
    data_path = 'pool_processed_v2.h5ad'
else:
    print("Running locally")
    data_path = '../../data/pool_processed_v2.h5ad'

In [None]:
# Import required packages
import pertpy as pt
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')
print("Packages loaded successfully")

## 1. Load and Prepare Data

In [None]:
# Load the proteomics data
adata = sc.read_h5ad(data_path)

print(f"Loaded data: {adata.shape}")

# Standardize column names
column_mapping = {
    'TauStatus': 'tau_status',
    'MC1': 'mc1_score', 
    'Pseudotime': 'pseudotime'
}

for old_name, new_name in column_mapping.items():
    if old_name in adata.obs.columns and new_name not in adata.obs.columns:
        adata.obs[new_name] = adata.obs[old_name]

# Ensure tau_status is categorical
if 'tau_status' not in adata.obs.columns:
    # Try to infer from other columns
    if 'TauStatus' in adata.obs.columns:
        adata.obs['tau_status'] = adata.obs['TauStatus']
    else:
        raise ValueError("Cannot find tau status column")

adata.obs['tau_status'] = pd.Categorical(adata.obs['tau_status'])
adata.obs['tau_positive'] = (adata.obs['tau_status'] == 'positive').astype(int)

print(f"Tau-positive samples: {adata.obs['tau_positive'].sum()}")
print(f"Tau-negative samples: {(adata.obs['tau_positive'] == 0).sum()}")

# Prepare protein names
if 'GeneName' in adata.var.columns:
    adata.var['protein_name'] = adata.var['GeneName']
elif 'gene_name' in adata.var.columns:
    adata.var['protein_name'] = adata.var['gene_name']
else:
    adata.var['protein_name'] = adata.var.index

# Create counts layer for DESeq2
if 'counts' not in adata.layers:
    print("Creating pseudo-counts from log2 data...")
    adata.layers['counts'] = np.power(2, adata.X) * 1000
    adata.layers['counts'] = np.round(adata.layers['counts']).astype(int)

# Ensure dense matrix
if hasattr(adata.X, 'toarray'):
    adata.X = adata.X.toarray()

## 2. Define Comprehensive UPS Protein List (132 proteins)

All 132 UPS proteins embedded directly - no external files!

In [None]:
# COMPLETE 132 UPS PROTEINS LIST
# This comprehensive list avoids cherry-picking bias

ups_proteins_comprehensive = [
    # ===== PROTEASOME SUBUNITS (43 proteins) =====
    # 20S Core - Alpha subunits
    'PSMA1', 'PSMA2', 'PSMA3', 'PSMA4', 'PSMA5', 'PSMA6', 'PSMA7',
    
    # 20S Core - Beta subunits (including immunoproteasome)
    'PSMB1', 'PSMB2', 'PSMB3', 'PSMB4', 'PSMB5', 'PSMB6', 'PSMB7', 
    'PSMB8', 'PSMB9', 'PSMB10',
    
    # 19S Regulatory - ATPases
    'PSMC1', 'PSMC2', 'PSMC3', 'PSMC4', 'PSMC5', 'PSMC6',
    
    # 19S Regulatory - Non-ATPases
    'PSMD1', 'PSMD2', 'PSMD3', 'PSMD4', 'PSMD5', 'PSMD6', 'PSMD7', 'PSMD8', 
    'PSMD9', 'PSMD10', 'PSMD11', 'PSMD12', 'PSMD13', 'PSMD14',
    
    # Alternative caps and assembly factors
    'PSME1', 'PSME2', 'PSME3', 'PSMF1', 'PSMG1', 'PSMG3',
    
    # ===== E1 UBIQUITIN-ACTIVATING ENZYMES (7 proteins) =====
    'UBA1',   # Canonical E1
    'UBA2',   # SUMO E1
    'UBA3',   # NEDD8 E1
    'UBA5',   # UFM1 E1
    'UBA6',   # FAT10 E1
    'UBB',    # Ubiquitin B
    'UBC',    # Ubiquitin C
    
    # ===== E2 UBIQUITIN-CONJUGATING ENZYMES (18 proteins) =====
    'UBE2D1', 'UBE2D3', 'UBE2D4',  # Canonical E2s
    'UBE2E2', 'UBE2G1', 'UBE2H', 'UBE2I', 'UBE2K',
    'UBE2L3', 'UBE2L6', 'UBE2M', 'UBE2N', 'UBE2O', 
    'UBE2Q1', 'UBE2R2', 'UBE2V1', 'UBE2V2', 'UBE2Z',
    
    # ===== E3 UBIQUITIN LIGASES (19 proteins) =====
    'CBL',     # Casitas B-lineage lymphoma
    'FBXO2', 'FBXO6',  # F-box proteins
    'HECTD1', 'HECTD3', 'HECTD4',  # HECT domain E3s
    'HERC1', 'HERC2',  # HECT and RLD domain containing
    'HUWE1',   # HECT, UBA and WWE domain containing 1
    'ITCH',    # Itchy E3 ubiquitin ligase
    'NEDD4L',  # Neural precursor cell expressed
    'PARK7',   # Parkinson protein 7 (DJ-1)
    'RNF31',   # Ring finger protein 31
    'SMURF1',  # SMAD specific E3 ligase 1
    'TRIM25', 'TRIM32',  # Tripartite motif containing
    'UBE3A', 'UBE3B', 'UBE3C',  # Ubiquitin ligase E3s
    
    # ===== DEUBIQUITINATING ENZYMES (28 proteins) =====
    'ATXN3',   # Ataxin 3
    'BRCC3',   # BRCA1/BRCA2-containing complex 3
    'COPS5', 'COPS6',  # COP9 signalosome subunits
    'CYLD',    # Cylindromatosis
    'OTUB1', 'OTUD6B',  # OTU domain DUBs
    'STAMBP',  # STAM binding protein
    
    # UCH family
    'UCHL1', 'UCHL3', 'UCHL5',
    
    # USP family (largest DUB family)
    'USP4', 'USP5', 'USP7', 'USP8', 'USP9X', 'USP10',
    'USP11', 'USP14', 'USP15', 'USP19', 'USP24', 'USP25', 
    'USP30', 'USP32', 'USP46', 'USP47', 'USP48',
    
    # ===== UPS REGULATORS (9 proteins) =====
    'BAG6',    # BCL2 associated athanogene 6
    'NBR1',    # Neighbor of BRCA1 gene 1
    'OPTN',    # Optineurin
    'SQSTM1',  # Sequestosome 1 (p62)
    'TAX1BP1', # Tax1 binding protein 1
    'UBQLN1', 'UBQLN2', 'UBQLN4',  # Ubiquilin family
    'VCP',     # Valosin containing protein
    
    # ===== ALTERNATIVE MODIFIERS (8 proteins) =====
    'ATG12',   # Autophagy related 12 (Ubiquitin-like)
    'ISG15',   # Interferon-stimulated gene 15
    'NEDD8',   # Neural precursor cell expressed
    'SUMO2', 'SUMO3', 'SUMO4',  # Small ubiquitin-like modifiers
    'UFM1',    # Ubiquitin fold modifier 1
    'URM1'     # Ubiquitin related modifier 1
]

print(f"Total UPS proteins to analyze: {len(ups_proteins_comprehensive)}")
print("\nProtein categories:")
print("  - Proteasome subunits: 43")
print("  - E1 enzymes: 7")
print("  - E2 enzymes: 18")
print("  - E3 ligases: 19")
print("  - Deubiquitinases: 28")
print("  - UPS regulators: 9")
print("  - Alternative modifiers: 8")
print("  TOTAL: 132 proteins")

## 3. Check Protein Availability and Create Subset

In [None]:
# Check availability in dataset
protein_names = adata.var['protein_name'] if 'protein_name' in adata.var else adata.var.index
protein_list = protein_names.tolist()

available_ups = [p for p in ups_proteins_comprehensive if p in protein_list]
missing_ups = set(ups_proteins_comprehensive) - set(available_ups)

print(f"Available UPS proteins: {len(available_ups)}/{len(ups_proteins_comprehensive)}")
print(f"Coverage: {len(available_ups)/len(ups_proteins_comprehensive)*100:.1f}%")

if missing_ups:
    print(f"\nMissing proteins: {len(missing_ups)}")
    if len(missing_ups) <= 20:
        print(f"Missing: {sorted(missing_ups)}")
    else:
        print(f"Examples of missing: {sorted(list(missing_ups)[:10])}...")

# Create subset with only UPS proteins
ups_indices = [i for i, p in enumerate(protein_names) if p in available_ups]
adata_ups = adata[:, ups_indices].copy()

print(f"\nUPS subset shape: {adata_ups.shape}")
print(f"Samples: {adata_ups.n_obs}")
print(f"UPS proteins: {adata_ups.n_vars}")

## 4. Run PyDESeq2 Analysis

In [None]:
# Initialize PyDESeq2 with simple design (tau status only)
print("Running PyDESeq2 analysis...")

# Use the counts layer for DESeq2
if 'counts' in adata_ups.layers:
    # Temporarily swap counts to X for PyDESeq2
    adata_ups.layers['log2'] = adata_ups.X.copy()
    adata_ups.X = adata_ups.layers['counts'].copy()

# Run PyDESeq2
try:
    # Initialize with tau status as the main factor
    pds2 = pt.tl.PyDESeq2(
        adata=adata_ups,
        design="~tau_status",
        refit_cooks=True
    )
    
    # Fit the model
    pds2.fit()
    
    # Test contrast between tau positive and negative
    results_df = pds2.test_contrasts(
        pds2.contrast(
            column="tau_status",
            baseline="negative",
            group_to_compare="positive"
        )
    )
    
    print("✓ PyDESeq2 analysis completed successfully")
    
except Exception as e:
    print(f"PyDESeq2 failed: {e}")
    print("Falling back to traditional t-test approach...")
    
    # Fallback: Traditional differential expression
    results_list = []
    
    # Restore log2 data for t-test
    if 'log2' in adata_ups.layers:
        adata_ups.X = adata_ups.layers['log2'].copy()
    
    tau_pos = adata_ups.obs['tau_status'] == 'positive'
    tau_neg = adata_ups.obs['tau_status'] == 'negative'
    
    for i in range(adata_ups.n_vars):
        expr_pos = adata_ups.X[tau_pos, i]
        expr_neg = adata_ups.X[tau_neg, i]
        
        # Calculate statistics
        log2fc = np.mean(expr_pos) - np.mean(expr_neg)
        tstat, pval = stats.ttest_ind(expr_pos, expr_neg, nan_policy='omit')
        
        results_list.append({
            'protein': adata_ups.var.index[i],
            'log2FoldChange': log2fc,
            'pvalue': pval,
            'stat': tstat
        })
    
    results_df = pd.DataFrame(results_list)
    
    # Add adjusted p-values
    from statsmodels.stats.multitest import multipletests
    results_df['padj'] = multipletests(results_df['pvalue'], method='fdr_bh')[1]
    
    print("✓ Traditional analysis completed")

# Ensure we have protein column
if 'protein' not in results_df.columns:
    results_df['protein'] = results_df.index

## 5. Analyze Results

In [None]:
# Summary statistics
print("Differential Expression Results Summary:")
print("="*50)

# Count significant proteins
sig_threshold = 0.05
n_sig_nominal = (results_df['pvalue'] < sig_threshold).sum()
n_sig_adjusted = (results_df['padj'] < sig_threshold).sum()

print(f"Total UPS proteins tested: {len(results_df)}")
print(f"Significant (p < 0.05): {n_sig_nominal}")
print(f"Significant (FDR < 0.05): {n_sig_adjusted}")
print(f"Percentage significant (FDR): {n_sig_adjusted/len(results_df)*100:.1f}%")

# Calculate effect sizes
results_df['abs_log2FC'] = np.abs(results_df['log2FoldChange'])
large_effect = (results_df['abs_log2FC'] > 0.5).sum()
print(f"\nProteins with large effect (|log2FC| > 0.5): {large_effect}")

# Get top differentially expressed proteins
results_df_sorted = results_df.sort_values('padj')
print("\nTop 10 Differentially Expressed UPS Proteins:")
print("-"*60)
print(f"{'Protein':15} {'Log2FC':>10} {'P-value':>12} {'FDR':>12}")
print("-"*60)
for _, row in results_df_sorted.head(10).iterrows():
    print(f"{row['protein']:15} {row['log2FoldChange']:>10.3f} {row['pvalue']:>12.3e} {row['padj']:>12.3e}")

## 6. Create Volcano Plot

In [None]:
# Create volcano plot
fig, ax = plt.subplots(figsize=(10, 8))

# Calculate -log10(p-value)
results_df['neg_log10_pval'] = -np.log10(results_df['pvalue'] + 1e-300)

# Define significance thresholds
pval_threshold = -np.log10(0.05)
fc_threshold = 0.5

# Color points based on significance
colors = []
for _, row in results_df.iterrows():
    if row['padj'] < 0.05 and abs(row['log2FoldChange']) > fc_threshold:
        colors.append('red')  # Significant and large effect
    elif row['padj'] < 0.05:
        colors.append('orange')  # Significant but small effect
    elif abs(row['log2FoldChange']) > fc_threshold:
        colors.append('blue')  # Large effect but not significant
    else:
        colors.append('gray')  # Not significant

# Create scatter plot
scatter = ax.scatter(results_df['log2FoldChange'], 
                    results_df['neg_log10_pval'],
                    c=colors, alpha=0.6, s=50)

# Add threshold lines
ax.axhline(y=pval_threshold, color='black', linestyle='--', alpha=0.3, label='p = 0.05')
ax.axvline(x=fc_threshold, color='black', linestyle='--', alpha=0.3)
ax.axvline(x=-fc_threshold, color='black', linestyle='--', alpha=0.3)

# Label top proteins
top_proteins = results_df.nsmallest(10, 'padj')
for _, row in top_proteins.iterrows():
    if row['padj'] < 0.05:
        ax.annotate(row['protein'], 
                   (row['log2FoldChange'], row['neg_log10_pval']),
                   fontsize=8, alpha=0.7)

ax.set_xlabel('Log2 Fold Change (Tau+ vs Tau-)', fontsize=12)
ax.set_ylabel('-Log10(p-value)', fontsize=12)
ax.set_title(f'Volcano Plot: {len(available_ups)} UPS Proteins Differential Expression\n(Comprehensive Analysis - No Cherry-Picking)', 
            fontsize=14, fontweight='bold')

# Add legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='red', alpha=0.6, label='Significant & Large Effect'),
    Patch(facecolor='orange', alpha=0.6, label='Significant'),
    Patch(facecolor='blue', alpha=0.6, label='Large Effect Only'),
    Patch(facecolor='gray', alpha=0.6, label='Not Significant')
]
ax.legend(handles=legend_elements, loc='upper left')

plt.tight_layout()
plt.show()

## 7. Category-Specific Analysis

In [None]:
# Define UPS categories for detailed breakdown
ups_categories_detailed = {
    'Proteasome_Core': [],
    'Proteasome_Reg': [],
    'E1_Enzymes': [],
    'E2_Enzymes': [],
    'E3_Ligases': [],
    'DUBs': [],
    'Regulators': [],
    'Modifiers': []
}

# Categorize each protein
for protein in results_df['protein']:
    if protein.startswith('PSMA') or protein.startswith('PSMB'):
        ups_categories_detailed['Proteasome_Core'].append(protein)
    elif protein.startswith('PSMC') or protein.startswith('PSMD') or protein.startswith('PSME'):
        ups_categories_detailed['Proteasome_Reg'].append(protein)
    elif protein in ['UBA1', 'UBA2', 'UBA3', 'UBA5', 'UBA6', 'UBB', 'UBC']:
        ups_categories_detailed['E1_Enzymes'].append(protein)
    elif protein.startswith('UBE2'):
        ups_categories_detailed['E2_Enzymes'].append(protein)
    elif any(protein.startswith(x) for x in ['CBL', 'FBXO', 'HECTD', 'HERC', 'HUWE', 'ITCH', 'NEDD4', 'PARK', 'RNF', 'SMURF', 'TRIM', 'UBE3']):
        ups_categories_detailed['E3_Ligases'].append(protein)
    elif any(protein.startswith(x) for x in ['ATXN', 'BRCC', 'COPS', 'CYLD', 'OTUB', 'OTUD', 'STAMBP', 'UCHL', 'USP']):
        ups_categories_detailed['DUBs'].append(protein)
    elif protein in ['BAG6', 'NBR1', 'OPTN', 'SQSTM1', 'TAX1BP1', 'UBQLN1', 'UBQLN2', 'UBQLN4', 'VCP']:
        ups_categories_detailed['Regulators'].append(protein)
    elif protein in ['ATG12', 'ISG15', 'NEDD8', 'SUMO2', 'SUMO3', 'SUMO4', 'UFM1', 'URM1']:
        ups_categories_detailed['Modifiers'].append(protein)

# Analyze by category
print("\nCategory-Specific Analysis:")
print("="*70)
print(f"{'Category':20} {'Total':>8} {'Sig (FDR)':>10} {'% Sig':>8} {'Mean Log2FC':>12}")
print("-"*70)

category_results = {}
for cat_name, cat_proteins in ups_categories_detailed.items():
    if cat_proteins:
        cat_df = results_df[results_df['protein'].isin(cat_proteins)]
        n_sig = (cat_df['padj'] < 0.05).sum()
        pct_sig = n_sig / len(cat_df) * 100 if len(cat_df) > 0 else 0
        mean_fc = cat_df['log2FoldChange'].mean()
        
        category_results[cat_name] = {
            'total': len(cat_df),
            'significant': n_sig,
            'percent_sig': pct_sig,
            'mean_log2FC': mean_fc
        }
        
        print(f"{cat_name:20} {len(cat_df):>8} {n_sig:>10} {pct_sig:>7.1f}% {mean_fc:>12.3f}")

print("="*70)

## 8. Test Claim Validity

In [None]:
# Evaluate the claim
print("\n" + "="*60)
print("CLAIM EVALUATION")
print("="*60)
print("Claim: 'No significant UPS protein alterations across")
print("       tau-positive versus tau-negative neurons'")
print()

# Decision criteria
fdr_threshold = 0.05
effect_threshold = 0.5
percent_threshold = 5  # If >5% proteins significant, claim is refuted

# Calculate metrics
percent_sig = n_sig_adjusted / len(results_df) * 100
n_large_effect = ((results_df['padj'] < fdr_threshold) & 
                  (results_df['abs_log2FC'] > effect_threshold)).sum()

print(f"Analysis Results:")
print(f"- Proteins tested: {len(results_df)}/{len(ups_proteins_comprehensive)} available")
print(f"- Significant (FDR < {fdr_threshold}): {n_sig_adjusted} ({percent_sig:.1f}%)")
print(f"- With large effect size (|log2FC| > {effect_threshold}): {n_large_effect}")
print()

# Make decision
if percent_sig < percent_threshold:
    if n_sig_adjusted == 0:
        verdict = "STRONGLY SUPPORTED"
        explanation = "No UPS proteins show significant differential expression"
    else:
        verdict = "SUPPORTED"
        explanation = f"Only {percent_sig:.1f}% of UPS proteins show significant changes"
elif percent_sig < 10:
    verdict = "PARTIALLY SUPPORTED"
    explanation = f"{percent_sig:.1f}% of proteins changed, borderline significant"
elif percent_sig < 25:
    verdict = "PARTIALLY REFUTED"
    explanation = f"{percent_sig:.1f}% of UPS proteins significantly altered"
else:
    verdict = "REFUTED"
    explanation = f"{percent_sig:.1f}% of UPS proteins show significant alterations"

# Display verdict with visual emphasis
print("="*60)
print(f"VERDICT: {verdict}")
print("="*60)
print(f"Explanation: {explanation}")
print()

# Compare with cherry-picked analysis
cherry_picked_proteins = ['UCHL1', 'USP14', 'PSMA1', 'PSMB5', 'PSMC4', 'PSMD1', 'UBB', 'UBC']
cherry_available = [p for p in cherry_picked_proteins if p in results_df['protein'].values]
cherry_df = results_df[results_df['protein'].isin(cherry_available)]
cherry_sig = (cherry_df['padj'] < 0.05).sum()
cherry_pct = cherry_sig / len(cherry_df) * 100 if len(cherry_df) > 0 else 0

print("Comparison with Cherry-Picked Analysis:")
print(f"- Cherry-picked proteins tested: {len(cherry_df)}/{len(cherry_picked_proteins)}")
print(f"- Cherry-picked significant: {cherry_sig} ({cherry_pct:.1f}%)")
print(f"- Comprehensive significant: {n_sig_adjusted} ({percent_sig:.1f}%)")
print(f"- Difference: {abs(percent_sig - cherry_pct):.1f}% points")

if cherry_pct < 5 and percent_sig > 20:
    print("\n⚠️ Cherry-picking would have missed the UPS dysfunction!")

# List most changed proteins if any
if n_sig_adjusted > 0:
    print("\nMost significantly changed UPS proteins:")
    sig_proteins = results_df[results_df['padj'] < fdr_threshold].sort_values('padj')
    for _, row in sig_proteins.head(5).iterrows():
        direction = "↑" if row['log2FoldChange'] > 0 else "↓"
        print(f"  {row['protein']:15} {direction} log2FC={row['log2FoldChange']:>6.2f}, FDR={row['padj']:.3e}")

## 9. Create Summary Report

In [None]:
# Create comprehensive summary
summary = {
    'claim': 'No significant UPS protein alterations',
    'verdict': verdict,
    'proteins_tested': len(results_df),
    'proteins_defined': len(ups_proteins_comprehensive),
    'coverage': f"{len(results_df)/len(ups_proteins_comprehensive)*100:.1f}%",
    'significant_fdr005': n_sig_adjusted,
    'percent_significant': f"{percent_sig:.1f}%",
    'large_effects': n_large_effect,
    'top_protein': results_df_sorted.iloc[0]['protein'] if len(results_df_sorted) > 0 else None,
    'top_log2fc': results_df_sorted.iloc[0]['log2FoldChange'] if len(results_df_sorted) > 0 else None,
    'top_padj': results_df_sorted.iloc[0]['padj'] if len(results_df_sorted) > 0 else None
}

print("\n" + "="*60)
print("ANALYSIS SUMMARY")
print("="*60)
for key, value in summary.items():
    print(f"{key:25} {value}")

# Save results if not in Colab
if not IN_COLAB:
    output_file = 'claim1_ups_proteins_results.csv'
    results_df.to_csv(output_file, index=False)
    print(f"\nResults saved to: {output_file}")
    
    # Save significant proteins only
    sig_proteins_df = results_df[results_df['padj'] < 0.05]
    if len(sig_proteins_df) > 0:
        sig_output = 'claim1_significant_ups.csv'
        sig_proteins_df.to_csv(sig_output, index=False)
        print(f"Significant proteins saved to: {sig_output}")
else:
    print("\nRunning in Colab - results kept in memory")
    print("To download results, run:")
    print("  results_df.to_csv('ups_results.csv')")
    print("  files.download('ups_results.csv')")

## Summary

This **Colab-optimized** PertPy-based analysis of UPS proteins provides:

1. **Comprehensive coverage**: 132 UPS proteins analyzed (not cherry-picked)
2. **Robust statistics**: PyDESeq2 or fallback methods with FDR correction
3. **Clear visualization**: Volcano plot showing all proteins
4. **Category breakdown**: Analysis by UPS component type
5. **Objective verdict**: Data-driven evaluation of the claim
6. **Cherry-picking comparison**: Shows how selection bias affects conclusions

### Key Advantages of This Analysis:
- **No external files needed**: All 132 proteins embedded directly
- **Works in Google Colab**: Just upload pool_processed_v2.h5ad
- **Avoids cherry-picking bias**: Tests all UPS components
- **Statistically rigorous**: Proper FDR correction applied

The analysis determines whether the claim of "no significant UPS alterations" is supported or refuted based on comprehensive, unbiased data.