# SVS Visualization for Lung Cancer Pathology Images

This notebook demonstrates how to visualize SVS (Aperio Slide Virtual Slide) files, which are commonly used for storing whole slide images in digital pathology.

Author: [Your Name]  
Date: [Current Date]

## 1. Setup and Imports

In [None]:
# Standard imports
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import cv2
from tqdm.notebook import tqdm
import random
import warnings
warnings.filterwarnings('ignore')

# Import OpenSlide for SVS file handling
import openslide
from openslide import OpenSlide
from PIL import Image

# Import SVS visualization module
from svs_visualization import (
    load_svs_slide, get_slide_info, visualize_slide_thumbnail,
    visualize_slide_region, visualize_slide_grid, detect_tissue_regions,
    visualize_tissue_detection, extract_tissue_tiles, visualize_extracted_tiles,
    visualize_magnification_levels, visualize_slide_with_annotations,
    process_slide_batch
)

# Set plot style
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12, 8)

## 2. Load and Explore SVS Files

In [None]:
# Set path to SVS files
svs_dir = "../Data/type/Pathology Images/images"

# Check if directory exists
if not os.path.exists(svs_dir):
    print(f"Error: SVS directory not found: {svs_dir}")
else:
    print(f"SVS directory found: {svs_dir}")
    
    # List SVS files
    svs_files = [f for f in os.listdir(svs_dir) if f.endswith('.svs')]
    print(f"Found {len(svs_files)} SVS files")
    
    # Display first few files
    if svs_files:
        print("\nFirst 5 SVS files:")
        for i, f in enumerate(svs_files[:5]):
            print(f"  {i+1}. {f}")

In [None]:
# Load metadata to get information about the slides
try:
    import pandas as pd
    pathology_csv = "../Data/type/Pathology Images/pathology_images.csv"
    if os.path.exists(pathology_csv):
        pathology_df = pd.read_csv(pathology_csv)
        print(f"Loaded pathology metadata with {len(pathology_df)} entries")
        
        # Display first few rows
        display(pathology_df.head())
        
        # Check if image_filename column exists
        if 'image_filename' in pathology_df.columns:
            # Get list of image filenames in metadata
            metadata_files = set(pathology_df['image_filename'])
            
            # Check overlap with actual files
            svs_files_set = set(svs_files)
            overlap = metadata_files.intersection(svs_files_set)
            
            print(f"\nMetadata contains {len(metadata_files)} unique image filenames")
            print(f"Directory contains {len(svs_files_set)} SVS files")
            print(f"Overlap between metadata and directory: {len(overlap)} files")
    else:
        print(f"Pathology metadata file not found: {pathology_csv}")
        pathology_df = None
except Exception as e:
    print(f"Error loading pathology metadata: {e}")
    pathology_df = None

## 3. Visualize a Single SVS File

In [None]:
# Select a sample SVS file
if svs_files:
    sample_svs_path = os.path.join(svs_dir, svs_files[0])
    print(f"Selected sample SVS file: {sample_svs_path}")
    
    # Load slide
    slide = load_svs_slide(sample_svs_path)
    if slide is not None:
        # Get slide info
        info = get_slide_info(slide)
        
        # Display slide info
        print("\nSlide Information:")
        print(f"  Dimensions: {info['dimensions']}")
        print(f"  Level Count: {info['level_count']}")
        print(f"  Level Dimensions: {info['level_dimensions']}")
        print(f"  Level Downsamples: {info['level_downsamples']}")
        
        # Display select properties
        print("\nSelect Properties:")
        for key in ['openslide.vendor', 'openslide.objective-power', 'openslide.mpp-x', 'openslide.mpp-y']:
            if key in info['properties']:
                print(f"  {key}: {info['properties'][key]}")
    else:
        print("Error: Could not load sample SVS file")
else:
    print("No SVS files found")

In [None]:
# Visualize slide thumbnail
if 'slide' in locals() and slide is not None:
    thumbnail = visualize_slide_thumbnail(slide, title=f"Slide Thumbnail: {os.path.basename(sample_svs_path)}", 
                                        figsize=(12, 12), save_path="slide_thumbnail.png")

In [None]:
# Visualize magnification levels
if 'slide' in locals() and slide is not None:
    visualize_magnification_levels(slide, title=f"Magnification Levels: {os.path.basename(sample_svs_path)}", 
                                 figsize=(15, 10), save_path="magnification_levels.png")

## 4. Lung Nodule Detection and Visualization

In this section, we'll detect and visualize lung nodules in the SVS images. Lung nodules are small masses of tissue in the lung that may be cancerous or benign.

In [None]:
# Import the new lung nodule detection functions
from svs_visualization import (
    detect_lung_nodules, visualize_lung_nodules,
    extract_nodule_images, visualize_nodule_grid
)

In [None]:
# Detect lung nodules
if 'slide' in locals() and slide is not None:
    # Detect lung nodules
    nodules, thumbnail, scale_factors = detect_lung_nodules(slide, min_size=1000, max_size=50000)
    
    # Print number of detected nodules
    print(f"Detected {len(nodules)} potential lung nodules")
    
    # Visualize lung nodules
    visualize_lung_nodules(slide, nodules, thumbnail, 
                         title=f"Lung Nodule Detection: {os.path.basename(sample_svs_path)}", 
                         figsize=(15, 10), save_path="lung_nodule_detection.png")

In [None]:
# Extract nodule images
if 'slide' in locals() and slide is not None and 'nodules' in locals() and 'scale_factors' in locals():
    # Extract nodule images
    nodule_images = extract_nodule_images(slide, nodules, scale_factors, level=0, padding=50)
    
    # Visualize nodule grid
    visualize_nodule_grid(nodule_images, grid_size=(3, 3), 
                        title=f"Lung Nodules: {os.path.basename(sample_svs_path)}", 
                        figsize=(15, 15), save_path="lung_nodules.png")

In [None]:
# Compare nodule characteristics by cancer type
if 'pathology_df' in locals() and pathology_df is not None and 'major_category' in pathology_df.columns:
    # Get unique cancer types
    cancer_types = pathology_df['major_category'].dropna().unique()
    
    # Create output directory
    output_dir = "nodule_comparison"
    os.makedirs(output_dir, exist_ok=True)
    
    # Initialize dictionary to store nodule statistics
    nodule_stats = {}
    
    # Process each cancer type (limit to 2 for demonstration)
    for cancer_type in list(cancer_types)[:2]:
        # Get slides for this cancer type
        type_df = pathology_df[pathology_df['major_category'] == cancer_type]
        
        # Get list of image filenames
        if 'image_filename' in type_df.columns:
            type_files = type_df['image_filename'].tolist()
            
            # Filter to files that exist in the directory
            existing_files = [f for f in type_files if f in svs_files]
            
            # Select a sample (up to 2 slides)
            sample_files = existing_files[:2]
            
            if sample_files:
                print(f"\nProcessing {len(sample_files)} slides for cancer type: {cancer_type}")
                
                # Initialize list to store nodule counts
                nodule_counts = []
                
                # Process each slide
                for i, filename in enumerate(sample_files):
                    try:
                        # Load slide
                        slide_path = os.path.join(svs_dir, filename)
                        slide = load_svs_slide(slide_path)
                        
                        if slide is not None:
                            # Detect lung nodules
                            nodules, thumbnail, scale_factors = detect_lung_nodules(slide)
                            
                            # Add nodule count to list
                            nodule_counts.append(len(nodules))
                            
                            # Visualize lung nodules
                            visualize_lung_nodules(slide, nodules, thumbnail, 
                                                 title=f"{cancer_type}: {filename}", 
                                                 save_path=os.path.join(output_dir, f"{cancer_type}_{i+1}_nodules.png"))
                            
                            # Close slide
                            slide.close()
                    except Exception as e:
                        print(f"Error processing slide {filename}: {e}")
                
                # Calculate statistics
                if nodule_counts:
                    nodule_stats[cancer_type] = {
                        'count': len(nodule_counts),
                        'mean': np.mean(nodule_counts),
                        'std': np.std(nodule_counts),
                        'min': np.min(nodule_counts),
                        'max': np.max(nodule_counts)
                    }
            else:
                print(f"No slides found for cancer type: {cancer_type}")
        else:
            print("'image_filename' column not found in pathology data")
    
    # Display nodule statistics
    if nodule_stats:
        print("\nNodule Statistics by Cancer Type:")
        for cancer_type, stats in nodule_stats.items():
            print(f"\n{cancer_type}:")
            print(f"  Slides analyzed: {stats['count']}")
            print(f"  Mean nodules per slide: {stats['mean']:.2f}")
            print(f"  Standard deviation: {stats['std']:.2f}")
            print(f"  Range: {stats['min']} - {stats['max']}")
        
        # Create bar chart
        plt.figure(figsize=(12, 6))
        cancer_types = list(nodule_stats.keys())
        means = [stats['mean'] for stats in nodule_stats.values()]
        stds = [stats['std'] for stats in nodule_stats.values()]
        
        plt.bar(cancer_types, means, yerr=stds, capsize=10)
        plt.title('Average Number of Nodules by Cancer Type', fontsize=16)
        plt.xlabel('Cancer Type', fontsize=14)
        plt.ylabel('Average Number of Nodules', fontsize=14)
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, 'nodule_comparison.png'))
        plt.show()
    
    print(f"\nNodule comparison completed. Visualizations saved to '{output_dir}' directory.")
else:
    print("Cannot compare nodules by cancer type: pathology data not available or 'major_category' column missing")

## 5. Visualize Multiple SVS Files

In [None]:
# Process multiple slides
if svs_files:
    # Select a subset of slides to process
    max_slides = 3  # Limit to 3 slides for demonstration
    selected_slides = svs_files[:max_slides]
    
    # Create list of full paths
    selected_slide_paths = [os.path.join(svs_dir, f) for f in selected_slides]
    
    # Process slides
    process_slide_batch(selected_slide_paths, output_dir="slide_visualizations", max_slides=max_slides)
    
    print(f"\nProcessed {len(selected_slides)} slides. Visualizations saved to 'slide_visualizations' directory.")
else:
    print("No SVS files found")

## 6. Visualize Slides with Annotations

In [None]:
# Create sample annotations
if 'slide' in locals() and slide is not None:
    # Get slide dimensions
    width, height = slide.dimensions
    
    # Create sample annotations
    annotations = [
        (width // 4, height // 4, width // 8, height // 8, "Region 1", "red"),
        (width // 2, height // 2, width // 8, height // 8, "Region 2", "blue"),
        (3 * width // 4, 3 * height // 4, width // 8, height // 8, "Region 3", "green")
    ]
    
    # Visualize slide with annotations
    visualize_slide_with_annotations(slide, annotations, 
                                   title=f"Slide with Annotations: {os.path.basename(sample_svs_path)}", 
                                   figsize=(12, 12), save_path="slide_with_annotations.png")

## 7. Visualize Slides by Cancer Type

In [None]:
# Group slides by cancer type and visualize examples
if pathology_df is not None and 'major_category' in pathology_df.columns:
    # Get unique cancer types
    cancer_types = pathology_df['major_category'].dropna().unique()
    
    print(f"Found {len(cancer_types)} cancer types")
    
    # Create output directory
    output_dir = "cancer_type_visualizations"
    os.makedirs(output_dir, exist_ok=True)
    
    # Process each cancer type
    for cancer_type in cancer_types:
        # Get slides for this cancer type
        type_df = pathology_df[pathology_df['major_category'] == cancer_type]
        
        # Get list of image filenames
        type_files = type_df['image_filename'].tolist()
        
        # Filter to files that exist in the directory
        existing_files = [f for f in type_files if f in svs_files]
        
        # Select a sample (up to 3 slides)
        sample_files = existing_files[:3]
        
        if sample_files:
            print(f"\nProcessing {len(sample_files)} slides for cancer type: {cancer_type}")
            
            # Create cancer type directory
            type_dir = os.path.join(output_dir, cancer_type.replace(' ', '_'))
            os.makedirs(type_dir, exist_ok=True)
            
            # Process each slide
            for i, filename in enumerate(sample_files):
                try:
                    # Load slide
                    slide_path = os.path.join(svs_dir, filename)
                    slide = load_svs_slide(slide_path)
                    
                    if slide is not None:
                        # Visualize slide thumbnail
                        visualize_slide_thumbnail(slide, 
                                                title=f"{cancer_type}: {filename}", 
                                                save_path=os.path.join(type_dir, f"thumbnail_{i+1}.png"))
                        
                        # Detect tissue regions
                        tissue_mask, thumbnail, scale_factors = detect_tissue_regions(slide)
                        
                        # Extract tissue tiles
                        tiles = extract_tissue_tiles(slide, tissue_mask, scale_factors, max_tiles=4)
                        
                        # Visualize extracted tiles
                        visualize_extracted_tiles(tiles, grid_size=(2, 2), 
                                                title=f"{cancer_type}: {filename} - Tissue Regions", 
                                                save_path=os.path.join(type_dir, f"tiles_{i+1}.png"))
                        
                        # Close slide
                        slide.close()
                except Exception as e:
                    print(f"Error processing slide {filename}: {e}")
        else:
            print(f"No slides found for cancer type: {cancer_type}")
    
    print(f"\nProcessed slides by cancer type. Visualizations saved to '{output_dir}' directory.")
else:
    print("Cannot group slides by cancer type: metadata not available or 'major_category' column missing")

## 8. Conclusion

In this notebook, we've demonstrated how to visualize SVS files for lung cancer pathology images. The key steps included:

1. **Loading and exploring SVS files**:
   - Loading SVS files using OpenSlide
   - Extracting slide information and properties
   - Visualizing slide thumbnails and magnification levels

2. **Tissue detection and region extraction**:
   - Detecting tissue regions using thresholding
   - Extracting tiles from tissue regions
   - Visualizing extracted tiles

3. **Visualizing multiple slides**:
   - Processing batches of slides
   - Generating comprehensive visualizations

4. **Cancer type visualization**:
   - Grouping slides by cancer type
   - Visualizing examples from each cancer type

These visualizations provide valuable insights into the structure and content of the lung cancer pathology images, which will help in designing effective preprocessing and modeling strategies.