In [None]:
import sys
import os
from pathlib import Path

# Add project root to path
project_root = Path().resolve().parent
sys.path.append(str(project_root))

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import our refactored modules
from src.utils import configure_gpu, print_memory_stats, collect_files
from src.data import (
    load_dicom, load_nifti, process_volume, 
    create_dataloaders, analyze_dataset_statistics_efficiently
)
from src.models import BaseAutoencoder, VAE
from src.training import (
    TrainingConfig, VAEConfig, 
    train_autoencoder, train_vae
)
from src.analysis import (
    plot_training_history, plot_vae_training_history,
    visualize_reconstruction_samples, visualize_vae_reconstructions,
    extract_latent_vectors, visualize_latent_space,
    evaluate_model_performance
)

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")

print("✅ All imports successful!")
print(f"Project root: {project_root}")


In [None]:
# Configure GPU automatically
device = configure_gpu()
print(f"Using device: {device}")

# Print memory statistics
print_memory_stats()


In [None]:
# Example: Collect medical image files
# Replace with your actual data directory
data_dir = "/path/to/your/medical/images"

# Collect files (supports DICOM and NIfTI)
# files = collect_files(data_dir, extensions=['.dcm', '.nii', '.nii.gz'])

# For demonstration, let's create a sample metadata CSV
sample_data = {
    'file_path': [f'sample_{i}.nii.gz' for i in range(100)],
    'group': ['Control' if i < 50 else 'Patient' for i in range(100)],
    'age': np.random.normal(65, 10, 100),
    'gender': np.random.choice(['M', 'F'], 100)
}

df = pd.DataFrame(sample_data)
print(f"Created sample dataset with {len(df)} subjects")
print("\nDataset overview:")
print(df.head())
print("\nGroup distribution:")
print(df['group'].value_counts())


In [None]:
# Create autoencoder model
input_shape = (64, 128, 128)
latent_dim = 256

autoencoder = BaseAutoencoder(
    input_shape=input_shape,
    latent_dim=latent_dim
).to(device)

print(f"Autoencoder created:")
print(f"  Input shape: {input_shape}")
print(f"  Latent dimension: {latent_dim}")
print(f"  Total parameters: {sum(p.numel() for p in autoencoder.parameters()):,}")


In [None]:
# Configure training
ae_config = TrainingConfig(
    epochs=50,
    batch_size=4,
    learning_rate=0.001,
    optimizer='adam',
    scheduler='reduce_on_plateau',
    loss_function='mse',
    device=str(device),
    use_amp=True,
    early_stopping_patience=10,
    save_every=10,
    checkpoint_dir='checkpoints',
    model_name='demo_autoencoder'
)

print("Training configuration:")
print(f"  Epochs: {ae_config.epochs}")
print(f"  Learning rate: {ae_config.learning_rate}")
print(f"  Optimizer: {ae_config.optimizer}")
print(f"  Mixed precision: {ae_config.use_amp}")


In [None]:
# Create VAE model
vae = VAE(
    input_shape=input_shape,
    latent_dim=latent_dim
).to(device)

print(f"VAE created:")
print(f"  Input shape: {input_shape}")
print(f"  Latent dimension: {latent_dim}")
print(f"  Total parameters: {sum(p.numel() for p in vae.parameters()):,}")


In [None]:
# Configure VAE training
vae_config = VAEConfig(
    epochs=60,
    batch_size=4,
    learning_rate=0.001,
    optimizer='adam',
    scheduler='reduce_on_plateau',
    device=str(device),
    use_amp=True,
    beta=1.0,
    beta_warmup_steps=10,
    free_bits=0.0,
    reconstruction_loss='mse',
    early_stopping_patience=15,
    save_every=10,
    checkpoint_dir='checkpoints',
    model_name='demo_vae'
)

print("VAE training configuration:")
print(f"  Beta (KL weight): {vae_config.beta}")
print(f"  Beta warmup steps: {vae_config.beta_warmup_steps}")
print(f"  Free bits: {vae_config.free_bits}")
print(f"  Reconstruction loss: {vae_config.reconstruction_loss}")


In [None]:
# Example: Training with data loaders (uncomment when you have real data)
# train_loader, val_loader = create_dataloaders(
#     df=df,
#     data_dir=data_dir,
#     batch_size=4,
#     train_split=0.8,
#     val_split=0.2,
#     target_shape=(64, 128, 128),
#     num_workers=2
# )

# # Train autoencoder
# ae_history = train_autoencoder(
#     model=autoencoder,
#     train_loader=train_loader,
#     val_loader=val_loader,
#     config=ae_config
# )

# # Train VAE
# vae_history = train_vae(
#     model=vae,
#     train_loader=train_loader,
#     val_loader=val_loader,
#     config=vae_config
# )

print("💡 Training code ready - uncomment when you have medical image data")
print("💡 The models will train on your DICOM/NIfTI files automatically")


In [None]:
# Example: Visualization functions (uncomment after training)
# plot_training_history(ae_history, save_path='autoencoder_history.png')
# plot_vae_training_history(vae_history, save_path='vae_history.png')

# # Visualize reconstructions
# visualize_reconstruction_samples(
#     model=autoencoder,
#     dataloader=val_loader,
#     num_samples=3,
#     save_path='autoencoder_reconstructions.png'
# )

# visualize_vae_reconstructions(
#     model=vae,
#     dataloader=val_loader,
#     num_samples=3,
#     save_path='vae_reconstructions.png'
# )

print("💡 Visualization functions ready for trained models")
print("💡 Will generate training curves, reconstruction comparisons, and more")


In [None]:
# Example: Latent space analysis (uncomment after VAE training)
# latent_vectors, group_labels = extract_latent_vectors(
#     model=vae,
#     dataloader=val_loader,
#     include_groups=True
# )

# # Visualize latent space with t-SNE
# visualize_latent_space(
#     latent_vectors=latent_vectors,
#     labels=group_labels,
#     method='tsne',
#     save_path='latent_space_tsne.png'
# )

# # Visualize with PCA
# visualize_latent_space(
#     latent_vectors=latent_vectors,
#     labels=group_labels,
#     method='pca',
#     save_path='latent_space_pca.png'
# )

print("💡 Latent space analysis ready")
print("💡 Will generate t-SNE and PCA plots to visualize learned representations")


In [None]:
print("""
Command-Line Usage Examples:

1. Train Autoencoder:
   python scripts/train_autoencoder.py \\
     --data_csv data.csv \\
     --data_dir /path/to/images \\
     --epochs 100 \\
     --batch_size 4 \\
     --latent_dim 256

2. Train VAE:
   python scripts/train_vae.py \\
     --data_csv data.csv \\
     --data_dir /path/to/images \\
     --epochs 150 \\
     --beta 1.0 \\
     --beta_warmup_steps 20

3. Evaluate Model:
   python scripts/evaluate_model.py \\
     --checkpoint_path model_best.pth \\
     --model_type autoencoder \\
     --data_csv data.csv \\
     --data_dir /path/to/images \\
     --visualize \\
     --latent_analysis
""")


In [None]:
# Load and display configuration files
import json

# Load default autoencoder config
with open(project_root / 'configs' / 'autoencoder_default.json', 'r') as f:
    ae_config_dict = json.load(f)

print("Default Autoencoder Configuration:")
print(json.dumps(ae_config_dict, indent=2))


In [None]:
# Load VAE config
with open(project_root / 'configs' / 'vae_default.json', 'r') as f:
    vae_config_dict = json.load(f)

print("Default VAE Configuration:")
print(json.dumps(vae_config_dict, indent=2))
