## 1. Setup and Imports

In [None]:
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import logging

# Add src to path
sys.path.insert(0, './src')

# Import project modules
from nma_analysis import ANMAnalyzer
from spectral_generation import SpectralGenerator, DeltaSpectralFeatures
from data_acquisition import PDBDataAcquisition
from utils import Logger, set_seed, get_device

# Setup logging and visualization
logger = Logger.setup('QDD-Notebook', level=logging.INFO)
set_seed(42)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Create data directories
Path('./data/pdb').mkdir(parents=True, exist_ok=True)
Path('./data/spectral').mkdir(parents=True, exist_ok=True)

logger.info("Setup complete!")

## 2. Download Test Structure (Ubiquitin)

In [None]:
import prody as pr

# Download Ubiquitin structure
logger.info("Downloading Ubiquitin (1UBQ) from PDB...")
pdb_path = pr.fetchPDB('1UBQ', folder='./data/pdb')
logger.info(f"Downloaded to: {pdb_path}")

# Load and inspect
pdb = pr.parsePDB(pdb_path)
logger.info(f"Structure: {pdb.getTitle()}")
logger.info(f"Atoms: {pdb.numAtoms()}, Residues: {pdb.numResidues()}")

# Get sequence
ca = pdb.select('ca')
sequence = pr.getSequence(ca)
logger.info(f"Sequence: {sequence}")

## 3. Normal Mode Analysis

In [None]:
# Initialize ANM analyzer
logger.info("Initializing ANM analyzer...")
anm = ANMAnalyzer(pdb, cutoff=15.0, distance_weighted=True)

# Compute first 100 normal modes
logger.info("Computing normal modes...")
frequencies, eigenvectors = anm.compute_modes(k=100)

logger.info(f"Mode frequencies (first 10):")
for i, f in enumerate(frequencies[:10]):
    logger.info(f"  Mode {i+1}: {f:.2f} cm^-1")

## 4. Compute VDOS and Spectral Features

In [None]:
# Generate VDOS with different broadening factors
logger.info("Computing vibrational density of states (VDOS)...")

sg = SpectralGenerator(freq_min=0, freq_max=500, n_points=1000)

vdos_narrow = sg.generate_dos(frequencies, broadening=3.0)
vdos_medium = sg.generate_dos(frequencies, broadening=5.0)
vdos_broad = sg.generate_dos(frequencies, broadening=10.0)

logger.info("VDOS generation complete!")

## 5. Vibrational Entropy Calculation

In [None]:
# Compute vibrational entropy at different temperatures
logger.info("Computing vibrational entropy...")

temperatures = [288, 298, 310, 325]  # K
entropies = {}

for T in temperatures:
    s_vib = anm.compute_vibrational_entropy(k=100, temperature=T)
    entropies[T] = s_vib
    logger.info(f"S_vib at {T}K: {s_vib:.2f} J/(mol*K)")

# Create dataframe for visualization
entropy_df = pd.DataFrame({
    'Temperature (K)': list(entropies.keys()),
    'Vibrational Entropy (J/mol/K)': list(entropies.values())
})

logger.info(f"\n{entropy_df}")

## 6. Spectral Feature Extraction

In [None]:
# Extract handcrafted features from VDOS
logger.info("Extracting spectral features...")

features_narrow = sg.extract_spectral_features(vdos_narrow)
features_medium = sg.extract_spectral_features(vdos_medium)
features_broad = sg.extract_spectral_features(vdos_broad)

# Compare
features_comparison = pd.DataFrame({
    'Narrow (3 cm⁻¹)': features_narrow,
    'Medium (5 cm⁻¹)': features_medium,
    'Broad (10 cm⁻¹)': features_broad,
})

logger.info("\nSpectral Features by Broadening:")
print(features_comparison.round(3))

## 7. Visualizations

In [None]:
# Plot mode frequencies histogram
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Mode frequency distribution
axes[0, 0].hist(frequencies, bins=20, color='steelblue', edgecolor='black', alpha=0.7)
axes[0, 0].set_xlabel('Frequency (cm⁻¹)', fontsize=12)
axes[0, 0].set_ylabel('Count', fontsize=12)
axes[0, 0].set_title('Distribution of Normal Mode Frequencies', fontsize=13, fontweight='bold')
axes[0, 0].grid(alpha=0.3)

# 2. VDOS with different broadening
freq_axis = sg.freq_axis
axes[0, 1].plot(freq_axis, vdos_narrow, label='Narrow (3 cm⁻¹)', linewidth=2)
axes[0, 1].plot(freq_axis, vdos_medium, label='Medium (5 cm⁻¹)', linewidth=2)
axes[0, 1].plot(freq_axis, vdos_broad, label='Broad (10 cm⁻¹)', linewidth=2)
axes[0, 1].set_xlabel('Frequency (cm⁻¹)', fontsize=12)
axes[0, 1].set_ylabel('Intensity (normalized)', fontsize=12)
axes[0, 1].set_title('Vibrational Density of States (VDOS)', fontsize=13, fontweight='bold')
axes[0, 1].legend(fontsize=10)
axes[0, 1].grid(alpha=0.3)

# 3. Vibrational entropy vs temperature
axes[1, 0].plot(entropy_df['Temperature (K)'], entropy_df['Vibrational Entropy (J/mol/K)'], 
                'o-', linewidth=2, markersize=8, color='darkgreen')
axes[1, 0].set_xlabel('Temperature (K)', fontsize=12)
axes[1, 0].set_ylabel('S_vib (J/(mol·K))', fontsize=12)
axes[1, 0].set_title('Vibrational Entropy vs Temperature', fontsize=13, fontweight='bold')
axes[1, 0].grid(alpha=0.3)

# 4. Spectral features heatmap
features_df = pd.DataFrame({
    'Narrow': features_narrow,
    'Medium': features_medium,
    'Broad': features_broad,
})

sns.heatmap(features_df, annot=True, fmt='.2f', cmap='YlOrRd', ax=axes[1, 1], cbar_kws={'label': 'Value'})
axes[1, 1].set_title('Spectral Features by Broadening', fontsize=13, fontweight='bold')
axes[1, 1].set_ylabel('Feature', fontsize=12)

plt.tight_layout()
plt.savefig('./data/spectral/nma_analysis_ubiquitin.png', dpi=150, bbox_inches='tight')
plt.show()

logger.info("Saved visualization: ./data/spectral/nma_analysis_ubiquitin.png")

## 8. Mode Analysis - Collectivity and Fluctuations

In [None]:
# Analyze mode properties
logger.info("Analyzing mode properties...")

collectivities = []
for i in range(10):
    col = anm.get_mode_collectivity(mode_idx=i)
    collectivities.append(col)
    logger.info(f"Mode {i+1}: Collectivity = {col:.3f}")

# Get residue fluctuations
fluctuations = anm.get_residue_fluctuations(k=100)
logger.info(f"\nResidue fluctuations:")
logger.info(f"  Mean: {np.mean(fluctuations):.3f} Å²")
logger.info(f"  Min: {np.min(fluctuations):.3f} Å²")
logger.info(f"  Max: {np.max(fluctuations):.3f} Å²")

## 9. Summary and Next Steps

In [None]:
logger.info("\n" + "="*60)
logger.info("NMA Prototype Summary")
logger.info("="*60)
logger.info(f"Structure: Ubiquitin (1UBQ)")
logger.info(f"Residues analyzed: {anm.n_atoms}")
logger.info(f"Modes computed: 100")
logger.info(f"Frequency range: {frequencies.min():.2f} - {frequencies.max():.2f} cm⁻¹")
logger.info(f"Vibrational entropy (298K): {entropies[298]:.2f} J/(mol·K)")
logger.info(f"\nNext steps:")
logger.info(f"  1. Test on mutation analysis (compare WT vs mutant)")
logger.info(f"  2. Integrate with deep learning models (GNN + CNN)")
logger.info(f"  3. Process Kaggle competition data")
logger.info(f"  4. Train multimodal architecture")
logger.info("="*60)