In [6]:
!pip install torch pandas numpy matplotlib seaborn pydicom nibabel scikit-image

[33mDEPRECATION: Loading egg at /usr/local/lib/python3.12/dist-packages/dill-0.3.9-py3.12.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0m[33mDEPRECATION: Loading egg at /usr/local/lib/python3.12/dist-packages/opt_einsum-3.4.0-py3.12.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0m[33mDEPRECATION: Loading egg at /usr/local/lib/python3.12/dist-packages/nvfuser-0.2.23a0+6627725-py3.12-linux-x86_64.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m
[0m[33mDEPRECATION: Loading egg at /usr/local/lib/python3.12/dist-packag

In [7]:
import sys
import os
from pathlib import Path

# Add project root to path
project_root = Path().resolve().parent
sys.path.append(str(project_root))

import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import our refactored modules
from src.utils import configure_gpu, print_memory_stats, collect_files
from src.data import (
    load_dicom, load_nifti, process_volume, 
    create_dataloaders, analyze_dataset_statistics_efficiently
)
from src.models import BaseAutoencoder, VAE
from src.training import (
    TrainingConfig, VAEConfig, 
    train_autoencoder, train_vae
)
from src.analysis import (
    plot_training_history, plot_vae_training_history,
    visualize_reconstruction_samples, visualize_vae_reconstructions,
    extract_latent_vectors, visualize_latent_space,
    evaluate_model_performance
)

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")

print("✅ All imports successful!")
print(f"Project root: {project_root}")


✅ All imports successful!
Project root: /workspace/unsupervised-parkinsons-imaging


In [8]:
# Configure GPU automatically
device = configure_gpu()
print(f"Using device: {device}")

# Print memory statistics
print_memory_stats()


Using GPU: NVIDIA GeForce RTX 4090
CUDA device count: 1
CUDA device capability: (8, 9)
Using device: cuda
System Memory - Available: 59.32 GB, Used: 4.8%
GPU Memory - Allocated: 0.00 GB, Cached: 0.00 GB


In [9]:
def collect_files(base_dir):
    included_files = []
    excluded_files = []

    expected_folders = {
        "PPMI_Images_PD": "PD",
        "PPMI_Images_SWEDD": "SWEDD",
        "PPMI_Images_Cont": "Control"
    }

    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        if os.path.isdir(folder_path) and folder in expected_folders:
            for root, dirs, files in os.walk(folder_path):
                for file in files:
                    if file.endswith(".dcm"):
                        full_path = os.path.join(root, file)
                        if "br_raw" in full_path:
                            excluded_files.append(full_path)
                        else:
                            included_files.append((full_path, expected_folders[folder]))
    return included_files, excluded_files

def generate_dataframe(included_files):
    return pd.DataFrame(included_files, columns=["file_path", "label"])

# --- Set your actual base directory ---
data_dir = "/workdir/Images"  # adjust to your actual path after unzipping

included, excluded = collect_files(data_dir)
df = generate_dataframe(included)

print(f"✅ Collected {len(df)} valid DICOM files")
print(df.head())

✅ Collected 2976 valid DICOM files
                                           file_path    label
0  /workdir/Images/PPMI_Images_Cont/100004/Recons...  Control
1  /workdir/Images/PPMI_Images_Cont/100890/Recons...  Control
2  /workdir/Images/PPMI_Images_Cont/100956/Recons...  Control
3  /workdir/Images/PPMI_Images_Cont/101039/Recons...  Control
4  /workdir/Images/PPMI_Images_Cont/101195/Recons...  Control


In [11]:
# Create autoencoder model
input_shape = (64, 128, 128)
latent_dim = 256

autoencoder = BaseAutoencoder(
    latent_dim=latent_dim
).to(device)

print(f"Autoencoder created:")
print(f"  Input shape: {input_shape}")
print(f"  Latent dimension: {latent_dim}")
print(f"  Total parameters: {sum(p.numel() for p in autoencoder.parameters()):,}")


Autoencoder created:
  Input shape: (64, 128, 128)
  Latent dimension: 256
  Total parameters: 77,171,969


In [12]:
# Configure training
ae_config = TrainingConfig(
    epochs=50,
    batch_size=4,
    learning_rate=0.001,
    optimizer='adam',
    scheduler='reduce_on_plateau',
    loss_function='mse',
    device=str(device),
    use_amp=True,
    early_stopping_patience=10,
    save_every=10,
    checkpoint_dir='checkpoints',
    model_name='demo_autoencoder'
)

print("Training configuration:")
print(f"  Epochs: {ae_config.epochs}")
print(f"  Learning rate: {ae_config.learning_rate}")
print(f"  Optimizer: {ae_config.optimizer}")
print(f"  Mixed precision: {ae_config.use_amp}")


Training configuration:
  Epochs: 50
  Learning rate: 0.001
  Optimizer: adam
  Mixed precision: True


In [19]:
# Create VAE model
vae = VAE(
    latent_dim=latent_dim
).to(device)

print(f"VAE created:")
print(f"  Input shape: {input_shape}")
print(f"  Latent dimension: {latent_dim}")
print(f"  Total parameters: {sum(p.numel() for p in vae.parameters()):,}")


VAE created:
  Input shape: (64, 128, 128)
  Latent dimension: 256
  Total parameters: 77,303,297


In [20]:
# Configure VAE training
vae_config = VAEConfig(
    epochs=60,
    batch_size=4,
    learning_rate=0.001,
    optimizer='adam',
    scheduler='reduce_on_plateau',
    device=str(device),
    use_amp=True,
    beta=1.0,
    beta_warmup_steps=10,
    free_bits=0.0,
    early_stopping_patience=15,
    save_every=10,
    checkpoint_dir='checkpoints',
    model_name='demo_vae'
)

print("VAE training configuration:")
print(f"  Beta (KL weight): {vae_config.beta}")
print(f"  Beta warmup steps: {vae_config.beta_warmup_steps}")
print(f"  Free bits: {vae_config.free_bits}")


VAE training configuration:
  Beta (KL weight): 1.0
  Beta warmup steps: 10
  Free bits: 0.0


In [26]:
# Training with data loaders
train_loader, val_loader = create_dataloaders(
    df=df,
    batch_size=4,
    train_split=0.8,
)


In [27]:
# Train autoencoder
ae_history = train_autoencoder(
    model=autoencoder,
    train_loader=train_loader,
    val_loader=val_loader,
    config=ae_config
)


Starting autoencoder training for 50 epochs
Device: cuda
Model parameters: 77,171,969


Epoch 1/50 [Train]:   0%|                                                                       | 0/595 [00:00<?, ?it/s]


RuntimeError: Given groups=1, weight of size [32, 1, 3, 3, 3], expected input[1, 4, 64, 128, 128] to have 1 channels, but got 4 channels instead

In [None]:
# Train VAE
vae_history = train_vae(
    model=vae,
    train_loader=train_loader,
    val_loader=val_loader,
    config=vae_config
)


In [None]:
# Example: Visualization functions (uncomment after training)
# plot_training_history(ae_history, save_path='autoencoder_history.png')
# plot_vae_training_history(vae_history, save_path='vae_history.png')

# # Visualize reconstructions
# visualize_reconstruction_samples(
#     model=autoencoder,
#     dataloader=val_loader,
#     num_samples=3,
#     save_path='autoencoder_reconstructions.png'
# )

# visualize_vae_reconstructions(
#     model=vae,
#     dataloader=val_loader,
#     num_samples=3,
#     save_path='vae_reconstructions.png'
# )

print("💡 Visualization functions ready for trained models")
print("💡 Will generate training curves, reconstruction comparisons, and more")


In [None]:
# Example: Latent space analysis (uncomment after VAE training)
# latent_vectors, group_labels = extract_latent_vectors(
#     model=vae,
#     dataloader=val_loader,
#     include_groups=True
# )

# # Visualize latent space with t-SNE
# visualize_latent_space(
#     latent_vectors=latent_vectors,
#     labels=group_labels,
#     method='tsne',
#     save_path='latent_space_tsne.png'
# )

# # Visualize with PCA
# visualize_latent_space(
#     latent_vectors=latent_vectors,
#     labels=group_labels,
#     method='pca',
#     save_path='latent_space_pca.png'
# )

print("💡 Latent space analysis ready")
print("💡 Will generate t-SNE and PCA plots to visualize learned representations")


In [None]:
print("""
Command-Line Usage Examples:

1. Train Autoencoder:
   python scripts/train_autoencoder.py \\
     --data_csv data.csv \\
     --data_dir /path/to/images \\
     --epochs 100 \\
     --batch_size 4 \\
     --latent_dim 256

2. Train VAE:
   python scripts/train_vae.py \\
     --data_csv data.csv \\
     --data_dir /path/to/images \\
     --epochs 150 \\
     --beta 1.0 \\
     --beta_warmup_steps 20

3. Evaluate Model:
   python scripts/evaluate_model.py \\
     --checkpoint_path model_best.pth \\
     --model_type autoencoder \\
     --data_csv data.csv \\
     --data_dir /path/to/images \\
     --visualize \\
     --latent_analysis
""")


In [None]:
# Load and display configuration files
import json

# Load default autoencoder config
with open(project_root / 'configs' / 'autoencoder_default.json', 'r') as f:
    ae_config_dict = json.load(f)

print("Default Autoencoder Configuration:")
print(json.dumps(ae_config_dict, indent=2))


In [None]:
# Load VAE config
with open(project_root / 'configs' / 'vae_default.json', 'r') as f:
    vae_config_dict = json.load(f)

print("Default VAE Configuration:")
print(json.dumps(vae_config_dict, indent=2))
