## Installation & Environment Setup

In [None]:
# Create and activate conda environment
# Run in terminal:
# conda env create -f environment.yml
# conda activate quantum_decoder

import sys
sys.path.insert(0, './src')

# Verify imports
try:
    import torch
    import prody as pr
    from rdkit import Chem
    import numpy as np
    print(f"✓ PyTorch {torch.__version__}")
    print(f"✓ ProDy {pr.__version__}")
    print(f"✓ NumPy {np.__version__}")
    print(f"✓ RDKit {Chem.rdBase.rdkitVersion}")
    print("\nAll dependencies installed!")
except ImportError as e:
    print(f"✗ Missing dependency: {e}")

## Core Workflow Overview

In [None]:
# 1. DATA ACQUISITION
from data_acquisition import PDBDataAcquisition

print("[PHASE 1] Data Acquisition")
print("-" * 50)
pdb_acq = PDBDataAcquisition(output_dir="./data/pdb")
pdb_ids = pdb_acq.query_pdb_advanced(num_structures=5)
print(f"Retrieved PDB IDs: {pdb_ids}")
print()

In [None]:
# 2. NMA ANALYSIS
from nma_analysis import ANMAnalyzer
import prody as pr

print("[PHASE 2] Normal Mode Analysis")
print("-" * 50)

# Download Ubiquitin for demo
pdb_path = pr.fetchPDB('1UBQ', folder='./data/pdb')

# Run ANM
anm = ANMAnalyzer(pdb_path, cutoff=15.0)
frequencies, modes = anm.compute_modes(k=50)
s_vib = anm.compute_vibrational_entropy(k=50)

print(f"Protein: 1UBQ")
print(f"Frequencies (first 5 modes): {frequencies[:5]}")
print(f"Vibrational entropy (298K): {s_vib:.2f} J/(mol·K)")
print()

In [None]:
# 3. SPECTRAL GENERATION
from spectral_generation import SpectralGenerator

print("[PHASE 3] Spectral Generation")
print("-" * 50)

sg = SpectralGenerator(freq_min=0, freq_max=500, n_points=1000)
vdos = sg.generate_dos(frequencies, broadening=5.0)
features = sg.extract_spectral_features(vdos)

print(f"VDOS shape: {vdos.shape}")
print(f"Spectral features:")
for key, value in features.items():
    print(f"  {key}: {value:.3f}")
print()

In [None]:
# 4. GRAPH CONSTRUCTION
from models.gnn import GraphConstruction
import torch

print("[PHASE 4] Graph Neural Network Preparation")
print("-" * 50)

# Extract structure info
pdb = pr.parsePDB(pdb_path)
ca = pdb.select('ca')
sequence = pr.getSequence(ca)

# Get coordinates and features
coords = torch.tensor(ca.getCoords(), dtype=torch.float32)
features = GraphConstruction.construct_residue_features(sequence)

# Build graph
graph_data = GraphConstruction.construct_ca_graph(
    coords, features, distance_cutoff=10.0
)

print(f"Sequence: {sequence}")
print(f"Graph nodes: {graph_data.x.shape[0]}")
print(f"Graph edges: {graph_data.edge_index.shape[1]}")
print()

In [None]:
# 5. MULTIMODAL MODEL
from models.multimodal import VibroStructuralModel

print("[PHASE 5] Multimodal Deep Learning Model")
print("-" * 50)

# Initialize model
model = VibroStructuralModel(
    latent_dim=128,
    gnn_input_dim=24,
    fusion_type='bilinear',
    num_go_terms=10000
)

print(f"Model architecture: Vibro-Structural")
print(f"  - GNN Encoder (Graph): input=24 → output=128")
print(f"  - CNN Encoder (Spectra): input=1000 → output=128")
print(f"  - Fusion: Bilinear")
print(f"  - Tasks: Novozymes (regression), CAFA 5 (multi-label)")

# Test forward pass
graph_data.batch = torch.zeros(graph_data.x.shape[0], dtype=torch.long)
spectra = torch.randn(1, 1, 1000)
global_features = torch.randn(1, 3)

with torch.no_grad():
    output = model(graph_data, spectra, global_features, task='novozymes')

print(f"\nTest forward pass: ✓")
print(f"Output shape (Novozymes): {output.shape}")
print()

## Key Concepts

### Vibrational Density of States (VDOS)
- Represents the spectrum of vibrational modes in a protein
- Captures dynamic properties orthogonal to 3D structure
- Computed from Normal Mode Analysis (ANM)

### Vibrational Entropy ($S_{vib}$)
- Thermodynamic measure of thermal mobility
- Predicts stability changes upon mutation
- Computed from mode frequencies using quantum harmonic oscillator model

### Multimodal Fusion
- **Structural Branch (GNN)**: Processes 3D graph topology
- **Spectral Branch (CNN)**: Processes 1D vibrational spectra
- **Fusion Layer**: Combines embeddings via bilinear transformation

### Competition Tasks
- **Novozymes**: Predict melting temperature ($T_m$) from sequence/structure
- **CAFA 5**: Predict Gene Ontology (GO) terms (multi-label classification)

## Next Steps

1. **Explore NMA Notebook** (`02_nma_prototype.ipynb`)
   - Detailed analysis of Normal Mode Analysis
   - Spectral feature extraction
   - Temperature-dependent entropy

2. **Run Novozymes Pipeline** (`03_novozymes_execution.ipynb`)
   - Download competition data
   - Preprocess mutations
   - Train ranking model

3. **Run CAFA 5 Pipeline** (`04_cafa5_execution.ipynb`)
   - Process AlphaFold structures
   - Generate spectral fingerprints
   - Multi-label classification

4. **Outreach & Publication**
   - Contact biophysics labs (Markelz, Nelson, Engel)
   - Publish technical blog post
   - Release on GitHub

In [None]:
print("="*60)
print("Quantum Data Decoder - Quick Start Complete!")
print("="*60)
print("\nProject Structure:")
print("  src/")
print("    ├── data_acquisition.py     # PDB, Kaggle, spectral data")
print("    ├── nma_analysis.py         # ANM calculations")
print("    ├── spectral_generation.py  # DOS synthesis")
print("    ├── models/")
print("    │   ├── gnn.py              # Graph Neural Network")
print("    │   ├── cnn.py              # 1D CNN for spectra")
print("    │   ├── multimodal.py       # Fusion architecture")
print("    │   └── losses.py           # Custom losses")
print("    └── utils.py                # Helpers & constants")
print("\n  notebooks/")
print("    ├── 01_quickstart.ipynb     # This notebook")
print("    ├── 02_nma_prototype.ipynb  # NMA analysis")
print("    ├── 03_novozymes.ipynb      # Stability prediction")
print("    └── 04_cafa5.ipynb          # Function prediction")
print("\nDocumentation:")
print("  - README.md: Project overview and setup")
print("  - environment.yml: Conda dependencies")
print("="*60)